SUMMARY
This R code is used to estimate the
relationship between oxygen consumption (MO2) and ambient oxygen partial
pressure (PO2) in the Common galaxias (Galaxias maculatus). It
is also used to estimate the critical partial pressure of oxygen for
aerobic metabolism (Pcrit), which is commonly understood as the
threshold below which oxygen consumption rate can no longer be
sustained. The associated article is “The role of osmorespiratory
compromise in hypoxia tolerance of the purportedly oxyconforming teleost
Galaxias maculatus”.
AIM The article aims to test whether Galaxias maculatus can maintain oxygen consumption (MO2) as ambient PO2 falls, and if so, at what level it reaches critical partial pressure of oxygen for aerobic metabolism (Pcrit).
AUTHORS
To be added
AFFILIATIONS
To be added
AIM
To be added
These are the settings for the html output. We will use this to make out index file on Git
#kniter seetting
knitr::opts_chunk$set(
message = FALSE,
warning = FALSE, # no warnings
cache = TRUE,# Cacheing to save time when kniting
tidy = TRUE
)
These are the R packages required for this script. You will need to install a package called pacman to run the p_load function.
# this installs and load packages
# need to install pacman
pacman::p_load("ggplot2",
"ggthemes",
"ggfortify",
"gtExtras",
"igraph",
"dagitty",
"ggdag",
"ggridges",
"gghalves",
"ggExtra",
"gridExtra",
"corrplot",
"RColorBrewer",
"gt",
"gtsummary",
"grid",
"plotly", # data visualisation
"tidyverse",
"janitor",
"readxl",
"broom.mixed",
"data.table",
"devtools",
"hms",
"round_hms", # data tidy
"marginaleffects",
"brms",
"rstan",
"performance",
"emmeans",
"tidybayes",
"vegan",
"betareg",
"lme4",
"car",
"lmerTest",
"qqplotr",
"respirometry",
"mclust",
"furrr",
# modelling
"datawizard",
"SRS" # data manipulation
)
Here are some custom function used within this script.
bayes_incremental_regression_by_id: A custom
function to build Bayesian incremental regressions in parallel
# Instead we could also use a distributional regression approach, by
# specifically modelling the variance by DO (e.g. sigma ~ DO). Weighting may
# not be required in this case, I don't think higher density of vaules in a
# given space will effect Bayesian estimates like it does in frequentist
# models. See discourse https://discourse.mc-stan.org/t/weights-in-brm/4278
# <br>
bayes_incremental_regression_by_id <- function(id_i, id_name, data, predictor, response,
save_models, mod_output_wd) {
# Initiate an empty list to store models
models <- list()
# If id_i and id_name is empty add
# Filter data for the current ID
df_i <- data %>%
dplyr::filter(!!rlang::sym(id_name) == id_i)
# Dynamically create formulas
formula_lm_0 <- reformulate("1", response)
formula_lm_1 <- reformulate(predictor, response)
formula_lm_2 <- reformulate(sprintf("poly(%s, 2)", predictor), response)
formula_lm_3 <- reformulate(sprintf("poly(%s, 3)", predictor), response)
# Fit and store models in the list
models[[paste0(id_i, "_lm_0")]] <- brm(bf(formula_lm_0, family = gaussian()),
data = df_i, cores = 4, seed = 143019, save_pars = save_pars(all = save_models),
sample_prior = FALSE, silent = TRUE, file = paste0(mod_output_wd, "/", id_i,
"_lm_0"))
models[[paste0(id_i, "_lm_1")]] <- brm(bf(formula_lm_1, family = gaussian()),
data = df_i, cores = 4, seed = 143019, save_pars = save_pars(all = save_models),
sample_prior = FALSE, silent = TRUE, file = paste0(mod_output_wd, "/", id_i,
"_lm_1"))
models[[paste0(id_i, "_lm_2")]] <- brm(bf(formula_lm_2, family = gaussian()),
data = df_i, cores = 4, seed = 143019, save_pars = save_pars(all = save_models),
sample_prior = FALSE, silent = TRUE, file = paste0(mod_output_wd, "/", id_i,
"_lm_2"))
models[[paste0(id_i, "_lm_3")]] <- brm(bf(formula_lm_3, family = gaussian()),
data = df_i, cores = 4, seed = 143019, save_pars = save_pars(all = save_models),
sample_prior = FALSE, silent = TRUE, file = paste0(mod_output_wd, "/", id_i,
"_lm_3"))
# Return the list of models for the current ID
return(models)
}
load_rds: A custom function to load all rds models in a directory and store in a list
load_rds <- function(model_dw) {
# List all .rds files in the directory
model_file_list <- list.files(path = model_dw, pattern = "\\.rds$", full.names = TRUE)
# Initialise an empty list to store models
model_store_list <- list()
# Iterate through each file and load the RDS
for (mod_i in model_file_list) {
mod <- readRDS(file = mod_i) # Read the RDS file
model_name <- tools::file_path_sans_ext(basename(mod_i)) # Extract the file name without extension
model_store_list[[model_name]] <- mod # Store it in the list
}
# Return the list of models
return(model_store_list)
}
incremental_regression_bayes_fits: A custom function for pulling model fits, loo and r2
# Define Function to Process the data for each ID
incremental_regression_bayes_fits <- function(models) {
loo_results_list <- list()
# Iterate over the names of the models
for (mod_name in names(models)) {
# Extract the model
mod_i <- models[[mod_name]]
# Compute LOO results
mod_loo_results_i <- loo::loo(mod_i)
# Extract relevant LOO metrics
elpd_loo_i <- mod_loo_results_i$elpd_loo
p_loo_i <- mod_loo_results_i$p_loo
looic_i <- mod_loo_results_i$looic
# Create a data frame with metrics
df_i <- data.frame(
elpd_loo = elpd_loo_i,
p_loo = p_loo_i,
looic = looic_i,
model = mod_name
)
est_i <- tidy(mod_i, effects = "fixed", conf.int = TRUE) %>%
dplyr::select(term, estimate, conf.low, conf.high) %>%
tidyr::pivot_wider(
names_from = term, # Use `term` as column names
values_from = c(estimate, conf.low, conf.high), # Values to pivot
names_sep = "_" # Add a separator to column names
)
df_i <- cbind(df_i, est_i)
# Store the data frame in the list
loo_results_list[[mod_name]] <- df_i
}
# Combind
loo_results_combined <- bind_rows(loo_results_list)
# Get R2
r2_results <- map_dfr(models, ~ as.data.frame(bayes_R2(.x)), .id = "model") %>%
tibble::remove_rownames()
# Combind R2 and loo results
model_fit_df <- dplyr::full_join(loo_results_combined, r2_results, by = "model") %>%
dplyr::select(model, everything()) %>%
dplyr::rename(r2 = Estimate,
r2_error = Est.Error,
r2_q2.5 = Q2.5,
r2_q97.5 = Q97.5) %>%
dplyr::mutate(id = sub("_(lm_\\d+)$", "", model),
model_type = sub("^.*_(lm_\\d+)$", "\\1", model))
return(model_fit_df)
}
bayes_mod_predictions: This function extracts the predicted values from a list of models and combinds it with the orgnial data file used for the model
bayes_mod_predictions <- function(models, original_data) {
prediction_list <- list()
for (mod_name in names(models)) {
# Extract mod
mod_i <- models[[mod_name]]
# Make mode predictions
model_predictions_i <- as.data.frame(fitted(mod_i, summary = TRUE)) %>%
dplyr::mutate(model = mod_name, id = sub("_(lm_\\d+)$", "", mod_name),
model_type = sub("^.*_(lm_\\d+)$", "\\1", mod_name)) %>%
dplyr::rename(pred_lower = Q2.5, pred_upper = Q97.5, predicted = Estimate,
pred_error = Est.Error) %>%
dplyr::select(model, everything())
id_i <- model_predictions_i$id[1]
original_data_i <- original_data %>%
dplyr::filter(id == id_i)
model_predictions_original_i <- cbind(model_predictions_i, original_data_i)
prediction_list[[mod_name]] <- model_predictions_original_i
}
predictions_df <- bind_rows(prediction_list)
return(predictions_df)
}
calcSMR: authored by Chabot D. used to estimate SMR with several different methods Claireaux and Chabot (2016) DOI: doi:10.1111/jfb.12833
calcSMR = function(Y, q = c(0.1, 0.15, 0.2, 0.25, 0.3), G = 1:4) {
u = sort(Y)
the.Mclust <- Mclust(Y, G = G)
cl <- the.Mclust$classification
# sometimes, the class containing SMR is not called 1 the following
# presumes that when class 1 contains > 10% of cases, it contains SMR,
# otherwise we take class 2
cl2 <- as.data.frame(table(cl))
cl2$cl <- as.numeric(levels(cl2$cl))
valid <- cl2$Freq >= 0.1 * length(time)
the.cl <- min(cl2$cl[valid])
left.distr <- Y[the.Mclust$classification == the.cl]
mlnd = the.Mclust$parameters$mean[the.cl]
CVmlnd = sd(left.distr)/mlnd * 100
quant = quantile(Y, q)
low10 = mean(u[1:10])
low10pc = mean(u[6:(5 + round(0.1 * (length(u) - 5)))])
# remove 5 outliers, keep lowest 10% of the rest, average Herrmann & Enders
# 2000
return(list(mlnd = mlnd, quant = quant, low10 = low10, low10pc = low10pc, cl = cl,
CVmlnd = CVmlnd))
}
calcO2crit: authored by Chabot D. used to estimate O2crit (Pcript). Claireaux and Chabot (2016) DOI: doi:10.1111/jfb.12833
Note: O2 is assumed to be in percentage of dissolved oxygen (DO) to work
calcO2crit <- function(Data, SMR, lowestMO2 = NA, gapLimit = 4, max.nb.MO2.for.reg = 20) {
# AUTHOR: Denis Chabot, Institut Maurice-Lamontagne, DFO, Canada first
# version written in June 2009 last updated in January 2015
method = "LS_reg" # will become 'through_origin' if intercept is > 0
if (is.na(lowestMO2))
lowestMO2 = quantile(Data$MO2[Data$DO >= 80], p = 0.05)
# Step 1: identify points where MO2 is proportional to DO
geqSMR = Data$MO2 >= lowestMO2
pivotDO = min(Data$DO[geqSMR])
lethal = Data$DO < pivotDO
N_under_SMR = sum(lethal) # points available for regression?
final_N_under_SMR = lethal # some points may be removed at Step 4
lastMO2reg = Data$MO2[Data$DO == pivotDO] # last MO2 when regulating
if (N_under_SMR > 1)
theMod = lm(MO2 ~ DO, data = Data[lethal, ])
# Step 2, add one or more point at or above SMR 2A, when there are fewer
# than 3 valid points to calculate a regression
if (N_under_SMR < 3) {
missing = 3 - sum(lethal)
not.lethal = Data$DO[geqSMR]
DOlimit = max(sort(not.lethal)[1:missing]) # highest DO acceptable
# to reach a N of 3
addedPoints = Data$DO <= DOlimit
lethal = lethal | addedPoints
theMod = lm(MO2 ~ DO, data = Data[lethal, ])
}
# 2B, add pivotDO to the fit when Step 1 yielded 3 or more values?
if (N_under_SMR >= 3) {
lethalB = Data$DO <= pivotDO # has one more value than 'lethal'
regA = theMod
regB = lm(MO2 ~ DO, data = Data[lethalB, ])
large_slope_drop = (coef(regA)[2]/coef(regB)[2]) > 1.1 # arbitrary
large_DO_gap = (max(Data$DO[lethalB]) - max(Data$DO[lethal])) > gapLimit
tooSmallMO2 = lastMO2reg < SMR
if (!large_slope_drop & !large_DO_gap & !tooSmallMO2)
{
lethal = lethalB
theMod = regB
} # otherwise we do not accept the additional point
}
# Step 3 if the user wants to limit the number of points in the regression
if (!is.na(max.nb.MO2.for.reg) & sum(lethal) > max.nb.MO2.for.reg) {
Ranks = rank(Data$DO)
lethal = Ranks <= max.nb.MO2.for.reg
theMod = lm(MO2 ~ DO, data = Data[lethal, ])
final_N_under_SMR = max.nb.MO2.for.reg
}
# Step 4
predMO2 = as.numeric(predict(theMod, data.frame(DO = Data$DO)))
Data$delta = (Data$MO2 - predMO2)/predMO2 * 100 # residuals set to zero
# when below pivotDO
Data$delta[Data$DO < pivotDO | lethal] = 0
tol = 0 # any positive residual is unacceptable
HighValues = Data$delta > tol
Ranks = rank(-1 * Data$delta)
HighMO2 = HighValues & Ranks == min(Ranks) # keep largest residual
if (sum(HighValues) > 0)
{
nblethal = sum(lethal)
Data$W = NA
Data$W[lethal] = 1/nblethal
Data$W[HighMO2] = 1
theMod = lm(MO2 ~ DO, weight = W, data = Data[lethal | HighMO2, ])
# This new regression is always an improvement, but there can still
# be points above the line, so we repeat
predMO2_2 = as.numeric(predict(theMod, data.frame(DO = Data$DO)))
Data$delta2 = (Data$MO2 - predMO2_2)/predMO2_2 * 100
Data$delta2[Data$DO < pivotDO] = 0
tol = Data$delta2[HighMO2]
HighValues2 = Data$delta2 > tol
if (sum(HighValues2) > 0)
{
Ranks2 = rank(-1 * Data$delta2)
HighMO2_2 = HighValues2 & Ranks2 == 1 # keep the largest residual
nblethal = sum(lethal)
Data$W = NA
Data$W[lethal] = 1/nblethal
Data$W[HighMO2_2] = 1
theMod2 = lm(MO2 ~ DO, weight = W, data = Data[lethal | HighMO2_2,
])
# is new slope steeper than the old one?
if (theMod2$coef[2] > theMod$coef[2]) {
theMod = theMod2
HighMO2 = HighMO2_2
}
} # end second search for high value
} # end first search for high value
Coef = coefficients(theMod)
# Step 5, check for positive intercept
AboveOrigin = Coef[1] > 0
# if it is, we use a regression that goes through the origin
if (AboveOrigin) {
theMod = lm(MO2 ~ DO - 1, data = Data[lethal, ])
Coef = c(0, coefficients(theMod)) # need to add the intercept (0)
# manually to have a pair of coefficients
method = "through_origin"
HighMO2 = rep(FALSE, nrow(Data)) # did not use the additional value
# from Step 4
}
po2crit = as.numeric(round((SMR - Coef[1])/Coef[2], 1))
sum_mod = summary(theMod)
anov_mod = anova(theMod)
O2CRIT = list(o2crit = po2crit, SMR = SMR, Nb_MO2_conforming = N_under_SMR, Nb_MO2_conf_used = final_N_under_SMR,
High_MO2_required = sum(HighMO2) == 1, origData = Data, Method = method,
mod = theMod, r2 = sum_mod$r.squared, P = anov_mod$"Pr(>F)", lethalPoints = which(lethal),
AddedPoints = which(HighMO2))
} # end function
plotO2crit: used to plot the modes used for the calcO2crit function. Claireaux and Chabot (2016) DOI: doi:10.1111/jfb.12833
plotO2crit <- function(o2critobj, plotID = "", Xlab = "Dissolved oxygen (% sat.)",
Ylab = "dotitalumol", smr.cex = 0.9, o2crit.cex = 0.9, plotID.cex = 1.2, Transparency = T,
...) {
# AUTHOR: Denis Chabot, Institut Maurice-Lamontagne, DFO, Canada first
# version written in June 2009 last updated 2015-02-09 for R plotting
# devices that do not support transparency (e.g., postscript), set
# Transparency to FALSE
smr = o2critobj$SMR
if (Ylab %in% c("dotitalumol", "italumol", "dotumol", "umol", "dotitalmg", "italmg",
"dotmg", "mg")) {
switch(Ylab, dotitalumol = {
mo2.lab = expression(paste(italic(dot(M))[O[2]], " (", mu, "mol ", O[2],
" ", min^-1, " ", kg^-1, ")"))
}, italumol = {
mo2.lab = expression(paste(italic(M)[O[2]], " (", mu, "mol ", O[2], " ",
min^-1, " ", kg^-1, ")"))
}, dotumol = {
mo2.lab = expression(paste(dot(M)[O[2]], " (", mu, "mol ", O[2], " ",
min^-1, " ", kg^-1, ")"))
}, umol = {
mo2.lab = expression(paste(M[O[2]], " (", mu, "mol ", O[2], " ", min^-1,
" ", kg^-1, ")"))
}, dotitalmg = {
mo2.lab = expression(paste(italic(dot(M))[O[2]], " (mg ", O[2], " ",
h^-1, " ", kg^-1, ")"))
}, italmg = {
mo2.lab = expression(paste(italic(M)[O[2]], " (mg ", O[2], " ", h^-1,
" ", kg^-1, ")"))
}, dotmg = {
mo2.lab = expression(paste(dot(M)[O[2]], " (mg ", O[2], " ", h^-1, " ",
kg^-1, ")"))
}, mg = {
mo2.lab = expression(paste(M[O[2]], " (mg ", O[2], " ", h^-1, " ", kg^-1,
")"))
})
} else mo2.lab = Ylab
if (Transparency) {
Col = c(rgb(0, 0, 0, 0.7), "red", "orange")
} else {
Col = c(grey(0.3), "red", "orange")
}
Data = o2critobj$origData
lowestMO2 = quantile(Data$MO2[Data$DO >= 80], p = 0.05) # I added this
Data$Color = Col[1]
Data$Color[o2critobj$lethalPoints] = Col[2]
Data$Color[o2critobj$AddedPoints] = Col[3]
# ordinary LS regression without added points: blue line, red symbols
# ordinary LS regression with added points: blue line, red & orange symbols
# regression through origin: green dotted line, red symbols
line.color = ifelse(o2critobj$Method == "LS_reg", "blue", "darkgreen")
line.type = ifelse(o2critobj$Method == "LS_reg", 1, 3)
limX = c(0, max(Data$DO))
limY = c(0, max(Data$MO2))
plot(MO2 ~ DO, data = Data, xlim = limX, ylim = limY, col = Data$Color, xlab = Xlab,
ylab = mo2.lab, ...)
coord <- par("usr")
if (plotID != "") {
text(0, coord[4], plotID, cex = plotID.cex, adj = c(0, 1.2))
}
abline(h = lowestMO2, col = "pink") # I added this
abline(h = smr, col = "orange")
text(coord[1], smr, "SMR", adj = c(-0.1, 1.3), cex = smr.cex)
text(coord[1], smr, round(smr, 1), adj = c(-0.1, -0.3), cex = smr.cex)
if (!is.na(o2critobj$o2crit)) {
abline(o2critobj$mod, col = line.color, lty = line.type)
segments(o2critobj$o2crit, smr, o2critobj$o2crit, coord[3], col = line.color,
lwd = 1)
text(x = o2critobj$o2crit, y = 0, o2critobj$o2crit, col = line.color, cex = o2crit.cex,
adj = c(-0.1, 0.5))
}
} # end of function
meta_files_wd: Directory for the metadata
wd <- getwd()
meta_files_wd <- paste0(wd, "./meta-data") # creates a variable with the name of the wd we want to use
labchart_wd: Directory for Labchart estimated slopes
labchart_wd <- paste0(wd, "./lab-chart-slopes")
mod_data_wd: Directory for model output data estimated slopes
mod_data_wd <- paste0(wd, "./mod-data")
output_fig_wd: this is where we will put the figures
output_fig_wd <- paste0(wd, "./output-fig")
ifelse(!dir.exists("output-fig"), dir.create("output-fig"), "Folder already exists")
## [1] "Folder already exists"
output_mods_wd: this is where we will put the figures
output_mods_wd <- paste0(wd, "./output-mod")
ifelse(!dir.exists("output-mod"), dir.create("output-mod"), "Folder already exists")
## [1] "Folder already exists"
slope_df: We have imported the slopes extracted in LabChart during each phase of the experiment
setwd(labchart_wd)
#
# # Get the names of all sheets in the Excel file
sheet_names <- excel_sheets("labchart-all-dates_v2.xlsx")
all_trials_select <- c("start_date", "order", "phase", "cycle", "date", "time")
slope_list <- list()
for (sheet in sheet_names) {
df <- read_excel("labchart-all-dates_v2.xlsx", sheet = sheet) %>%
dplyr::rename_with(tolower)
a_name <- paste0("a_", tolower(sheet))
a_df <- df %>%
dplyr::select(starts_with('a'), all_trials_select) %>%
dplyr::rename(temp = a_temp) %>%
dplyr::mutate(across(starts_with('a'), as.numeric)) %>%
pivot_longer(
cols = starts_with('a'), # Select all columns to pivot
names_to = c("chamber_id", ".value"), # Separate column names into 'id' and other variables
names_sep = "_"
) %>%
dplyr::mutate(respirometer_group = "a") # Add a new column with a fixed value
slope_list[[a_name]]<- a_df
b_name <- paste0("b_", tolower(sheet))
b_df <- df %>%
dplyr::select(starts_with('b'), all_trials_select) %>%
dplyr::rename(temp = b_temp) %>%
dplyr::mutate(across(starts_with('b'), as.numeric)) %>%
pivot_longer(
cols = starts_with('b'), # Select all columns to pivot
names_to = c("chamber_id", ".value"), # Separate column names into 'id' and other variables
names_sep = "_"
) %>%
dplyr::mutate(respirometer_group = "b")
slope_list[[b_name]] <- b_df
c_name <- paste0("c_", tolower(sheet))
c_df <- df %>%
dplyr::select(starts_with('c'), all_trials_select) %>%
dplyr::rename(temp = c_temp,
i_cycle = cycle) %>%
dplyr::mutate(across(starts_with('c'), as.numeric)) %>%
pivot_longer(
cols = starts_with('c'), # Select all columns to pivot
names_to = c("chamber_id", ".value"), # Separate column names into 'id' and other variables
names_sep = "_"
) %>%
dplyr::mutate(respirometer_group = "c") %>%
dplyr::rename(cycle = i_cycle)
slope_list[[c_name]] <- c_df
d_name <- paste0("d_", tolower(sheet))
d_df <- df %>%
dplyr::select(starts_with('d'), all_trials_select) %>%
dplyr::rename(temp = d_temp,
i_date = date) %>%
dplyr::mutate(across(starts_with('d'), as.numeric)) %>%
pivot_longer(
cols = starts_with('d'), # Select all columns to pivot
names_to = c("chamber_id", ".value"), # Separate column names into 'id' and other variables
names_sep = "_"
) %>%
dplyr::mutate(respirometer_group = "d") %>%
dplyr::rename(date = i_date)
slope_list[[d_name]] <- d_df
}
slope_df <- bind_rows(slope_list) %>%
dplyr::mutate(resp_cat_date = paste0(respirometer_group, "_", start_date),
chamber_n = str_extract(chamber_id, "\\d+"),
id_prox = paste0(resp_cat_date, "_", chamber_n),
time_hms = as_hms(time*3600),
date_chr = format(date, "%d/%m/%Y")
)
metadata: This is the meta data for each chamber
Note: We are also adding volume based on chamber type.
setwd(meta_files_wd)
metadata <- read_excel("Morpho.xlsx", na = "NA") %>%
dplyr::mutate(id_split = id) %>%
tidyr::separate(id_split, into = c("respirometer_group", "salinity_group", "start_date",
"chamber"), sep = "_") %>%
dplyr::mutate(volume = dplyr::case_when(chamber_type == "L" ~ 0.3, chamber_type ==
"M_M" ~ 0.105, chamber_type == "M_NM" ~ 0.11, chamber_type == "S" ~ 0.058,
chamber_type == "SM" ~ 0.075, chamber_type == "D3" ~ 0.055, TRUE ~ NA), id_prox = paste0(respirometer_group,
"_", start_date, "_", chamber))
Adding the meta data to LabChart slopes
slope_df_2 <- slope_df %>%
dplyr::select(-start_date, -respirometer_group) %>%
left_join(metadata, by = "id_prox") %>%
dplyr::mutate(light_dark = if_else(time_hms >= as.hms("07:00:00") & time_hms <
as.hms("19:00:00"), "light", "dark")) %>%
dplyr::arrange(id)
We have 64 fish with MO2 data
n <- slope_df_2 %>%
dplyr::filter(chamber_condition == "fish") %>%
dplyr::distinct(id) %>%
nrow(.)
paste0("n = ", n)
## [1] "n = 64"
With 48 from the 0 ppt and 48 from 9 ppt groups
slope_df_2 %>%
dplyr::group_by(salinity_group) %>%
dplyr::reframe(`n total` = length(unique(id))) %>%
gt() %>%
cols_label(salinity_group = "Salinity group") %>%
cols_align(align = "center", columns = everything())
| Salinity group | n total |
|---|---|
| 0 | 48 |
| 9 | 48 |
Here we caculate the mean length and size of fish used in the experiment.
mass_length <- slope_df_2 %>%
dplyr::group_by(id) %>%
dplyr::sample_n(1) %>%
dplyr::ungroup() %>%
dplyr::reframe(x_mass = round(mean(mass, na.rm = TRUE), 3), min_mass = round(min(mass,
na.rm = TRUE), 3), max_mass = round(max(mass, na.rm = TRUE), 3), x_length = round(mean(length,
na.rm = TRUE), 2), min_length = round(min(length, na.rm = TRUE), 2), max_length = round(max(length,
na.rm = TRUE), 2))
mass_mean <- mass_length %>%
pull(x_mass)
mass_min <- mass_length %>%
pull(min_mass)
mass_max <- mass_length %>%
pull(max_mass)
length_mean <- mass_length %>%
pull(x_length)
length_min <- mass_length %>%
pull(min_length)
length_max <- mass_length %>%
pull(max_length)
paste0("The mean mass of fish was ", mass_mean, " g (range: ", mass_min, "–", mass_max,
")", ", and the mean length was ", length_mean, " mm (range: ", length_min, "–",
length_max, ")")
## [1] "The mean mass of fish was 0.532 g (range: 0.21–1.6), and the mean length was 50.41 mm (range: 40–70)"
We will remove 6 trials which had errors. These are as follows:
remove_trial_error <- c("a_0_25nov_3", "b_0_26nov_4", "c_0_22nov_2", "c_9_26nov_2",
"c_9_26nov_4", "d_9_27nov_3")
slope_df_filter <- slope_df_2 %>%
dplyr::filter(!(id %in% remove_trial_error))
We now have 58 fish with MO2 data
n <- slope_df_filter %>%
dplyr::filter(chamber_condition == "fish") %>%
dplyr::distinct(id) %>%
nrow(.)
paste0("n = ", n)
## [1] "n = 58"
With 45 in the 0 ppt group and 45 in the 9 ppt group
slope_df_filter %>%
dplyr::group_by(salinity_group) %>%
dplyr::reframe(`n total` = length(unique(id))) %>%
gt() %>%
cols_label(salinity_group = "Salinity group") %>%
cols_align(align = "center", columns = everything())
| Salinity group | n total |
|---|---|
| 0 | 45 |
| 9 | 45 |
Here we apply the following filters to the MO2 data:
cycle_burn <- 0:4
slope_df_filter_1 <- slope_df_filter %>%
dplyr::filter(!(cycle %in% cycle_burn) & mo2corr < 0 & n > 60 & chamber_condition ==
"fish")
# Now we remove the points after the chamber is opened This is a function to do
# so
filter_o2_increase <- function(group) {
group <- group %>%
mutate(o2_diff = o2 - lag(o2)) # Calculate the difference in 'o2'
# Find the first index where 'o2_diff' exceeds 1
cutoff_index <- which(group$o2_diff > 1)[1]
# Filter the data up to the cutoff index, or return the full group if no
# cutoff
if (!is.na(cutoff_index)) {
group <- group[1:(cutoff_index - 1), ]
}
return(group)
}
# Apply the function to each group of 'chamber_id'
slope_tidy_closed <- slope_df_filter_1 %>%
dplyr::filter(phase != "smr") %>%
group_by(id) %>%
group_split() %>%
lapply(filter_o2_increase) %>%
bind_rows() %>%
select(-o2_diff)
slope_tidy_smr <- slope_df_filter_1 %>%
dplyr::filter(phase == "smr")
slope_df_filter_2 <- rbind(slope_tidy_smr, slope_tidy_closed) %>%
dplyr::arrange(id, order)
We have estimated SMR with two different appraches.
First using the mean of the lowest 3 values (smr_3l_means)
smr_3l_means <- slope_df_filter_2 %>%
dplyr::group_by(id) %>%
dplyr::filter(phase == "smr") %>%
dplyr::arrange(desc(mo2corr)) %>%
dplyr::slice_head(n = 3) %>% # Select the three lowest MO2
dplyr::ungroup() %>%
dplyr::group_by(id) %>%
dplyr::reframe(smr_l3 = mean(mo2corr))
# Combine the processed "smr" phase with all other phases
slope_df_filter_3 <- slope_df_filter_2 %>%
dplyr::left_join(., smr_3l_means, by = "id")
Second using the calcSMR function by Chabot, Steffensen and
Farrell (2016) DOI: 10.1111/jfb.12845. Specifically, We use mean of the
lowest normal distribution (MLND) where CVmlnd < 5.4 and the mean of
the lower 20% quantile (q0.2) were CVmlnd > 5.4. If CVmlnd is not
calculated we have used q0.2.
labchart_chabot_smr <- slope_df_filter_3 %>%
dplyr::filter(phase == "smr")
# Extract distinct IDs
ids <- labchart_chabot_smr %>%
dplyr::distinct(id) %>%
dplyr::pull()
# Initialise an empty list to store SMR data
smr_list <- list()
# Process each ID
for (id_i in ids) {
tryCatch({
# Filter the data for the current ID
df_i <- labchart_chabot_smr %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(abs_mo2corr = abs(mo2corr))
# Calculate SMR results
calcSMR_results <- calcSMR(df_i$abs_mo2corr)
CVmlnd_i <- calcSMR_results$CVmlnd
quant_i <- calcSMR_results$quant %>%
as_tibble()
quant_20per_i <- quant_i$value[3]
mlnd_i <- calcSMR_results$mlnd
smr_value <- if_else(CVmlnd_i < 5.4, mlnd_i, quant_20per_i)
smr_type <- if_else(CVmlnd_i < 5.4, "mlnd", "quant_20per")
smr_value <- if_else(is.na(smr_value), quant_20per_i, smr_value)
smr_type <- if_else(is.na(smr_type), "quant_20per", smr_type)
# Create a data frame for the current ID
smr_df <- tibble(id = id_i, smr = smr_value, smr_est = smr_type)
}, error = function(e) {
# Handle errors by assigning NA values
smr_df <- tibble(id = id_i, smr = NA, smr_est = NA)
})
# Append to the list
smr_list[[id_i]] <- smr_df
}
# Combine all individual SMR data frames into one
smr_df <- bind_rows(smr_list) %>%
dplyr::rename(smr_chabot = smr, smr_chabot_method = smr_est)
slope_df_filter_4 <- slope_df_filter_3 %>%
dplyr::left_join(., smr_df, by = "id")
Here we are transforming the MO2 units. The resulting vaules are as follows:
# Combine back into one data frame
slope_tidy <- slope_df_filter_4 %>%
dplyr::mutate(DO = conv_o2(
o2 = o2,
from = "mg_per_l",
to = "percent_a.s.",
temp = temp, #C
sal = measured_salinity,
atm_pres = 1013.25),
o2_kpa = conv_o2(
o2 = o2,
from = "mg_per_l",
to = "kPa",
temp = temp, #C
sal = measured_salinity,
atm_pres = 1013.25),
net_volume = volume - mass/1000,
MO2 = abs(mo2corr)*net_volume*60*60,
MO2_g = MO2/mass,
SMR = abs(smr_l3)*net_volume*60*60,
SMR_g = SMR/mass,
SMR_CHABOT = abs(smr_chabot)*net_volume*60*60,
SMR_CHABOT_g = SMR_CHABOT/mass
)
Here we plot all oxygen consumption (MO2; mg O2/g/h) by dissolved oxygen percentage (DO) for all fish, including all SMR estimates.
slope_tidy %>%
ggplot(aes(y = MO2_g, x = DO, colour = id)) + # Default aesthetics
geom_point(show.legend = FALSE) +
geom_smooth(aes(group = id), method = "lm", se = FALSE, colour = scales::alpha("black", 0.5)) + # Transparent black lines
geom_smooth(method = "lm", se = TRUE, colour = "red") + # Overall smooth line
geom_smooth(se = TRUE, colour = "red", linetype = "dashed") +
theme_clean() +
labs(
subtitle = "All values",
x = "Dissolved oxygen percentage (DO)",
y = "MO2 (mg O2 g/h)"
)
Same plot but without SMR values.
slope_tidy %>%
dplyr::filter(phase != "smr") %>%
ggplot(aes(y = MO2_g, x = DO, colour = id)) + # Default aesthetics
geom_point(show.legend = FALSE) +
geom_smooth(aes(group = id), method = "lm", se = FALSE, colour = scales::alpha("black", 0.5)) + # Transparent black lines
geom_smooth(method = "lm", se = TRUE, colour = "red") + # Overall smooth line
geom_smooth(se = TRUE, colour = "red", linetype = "dashed") +
theme_clean() +
labs(
subtitle = "Only closed periods",
x = "Dissolved oxygen percentage (DO)",
y = "MO2 (O2 mg/g/h)"
)
Looking at the difference responses in the two salinity groups.
It’s appears more variable in freshwater.
slope_tidy %>%
ggplot(aes(y = MO2_g, x = DO, colour = id)) + # Default aesthetics
geom_point(show.legend = FALSE) +
geom_smooth(aes(group = id), method = "lm", se = FALSE, colour = scales::alpha("black", 0.5)) + # Transparent black lines
geom_smooth(method = "lm", se = TRUE, colour = "red") + # Overall smooth line
geom_smooth(se = TRUE, colour = "red", linetype = "dashed") +
theme_clean() +
facet_wrap(~salinity_group) +
labs(
subtitle = "mo2 vs o2 by salinity treatment",
x = "Dissolved oxygen percentage (DO)",
y = "MO2 (O2 mg/g/h)"
)
Looking at the difference chamber types
slope_tidy %>%
ggplot(aes(y = MO2_g, x = DO, colour = id)) + # Default aesthetics
geom_point(show.legend = FALSE) +
geom_smooth(aes(group = id), method = "lm", se = FALSE, colour = scales::alpha("black", 0.5)) + # Transparent black lines
geom_smooth(method = "lm", se = TRUE, colour = "red") + # Overall smooth line
geom_smooth(se = TRUE, colour = "red", linetype = "dashed") +
theme_clean() +
facet_wrap(~chamber_type, scale = "free") +
labs(
subtitle = "mo2 vs o2 by chamber type",
x = "Dissolved oxygen percentage (DO)",
y = "MO2 (mg O2 g/h)"
)
Comparison to data from Urbina et al. (2012)
n <- slope_tidy %>%
dplyr::distinct(id) %>%
nrow(.)
min_o2_kpa <- min(slope_tidy$o2_kpa, na.rm = TRUE)
max_o2_kpa <- max(slope_tidy$o2_kpa, na.rm = TRUE)
slope_tidy <- slope_tidy %>%
mutate(o2_group = cut(o2_kpa,
breaks = seq(min_o2_kpa, max_o2_kpa, length.out = 13), # 11 intervals, so 12 breakpoints
labels = paste0("Group ", 1:12),
include.lowest = TRUE))
time_bin_df <- slope_tidy %>%
dplyr::group_by(o2_group) %>%
dplyr::reframe(mean_MO2_g = mean(MO2_g)*31.25,
mean_o2_kpa = mean(o2_kpa),
n = length(MO2_g)*31.25,
MO2_g_sd = sd(MO2_g)*31.25,
o2_kpa_sd = sd(o2_kpa))
time_bin_df %>%
ggplot(aes(y = mean_MO2_g, x = mean_o2_kpa)) +
# Add raw data points
geom_point(data = slope_tidy, aes(y = MO2_g*31.25, x = o2_kpa),
size = 2, color = "grey", alpha = 0.5) + # Raw data points
# Add summary points
geom_point(size = 3, colour = "black", show.legend = FALSE) +
# Add vertical error bars
geom_errorbar(aes(ymin = mean_MO2_g - MO2_g_sd, ymax = mean_MO2_g + MO2_g_sd),
width = 0.15, colour = "black") +
# Add horizontal error bars
geom_errorbarh(aes(xmin = mean_o2_kpa - o2_kpa_sd, xmax = mean_o2_kpa + o2_kpa_sd),
height = 0.4, colour = "black") +
annotate("text", x = 0,
y = 16,
label = paste0("n = ", n),
hjust = 0, vjust = 1, size = 4) +
theme_clean() +
labs(
subtitle = "",
x = "PO2 (kPa)",
y = "MO2 (umol O2 g/h)"
) +
scale_y_continuous(limits = c(0, 16), breaks = seq(0, 16, by = 2)) +
scale_x_continuous(limits = c(0, 22), breaks = seq(0, 22, by = 2)) # Custom y-axis scale
Making an SMR only data frame
slope_tidy_smr <- slope_tidy %>%
dplyr::filter(phase == "smr")
Plot of SMR by salinity treatment. The small points are the observed values, the shaded area behind the points is the a kernel density of the observed data, the box plot on top show the median and interquartile range (IQR).
mean_mo2_salinity <- slope_tidy_smr %>%
dplyr::group_by(salinity_group) %>%
dplyr::reframe(mean_mo2 = mean(MO2, na.rm = TRUE))
fig_i <- ggplot() +
geom_violin(data = slope_tidy_smr, aes(x = salinity_group, y = MO2, fill = salinity_group), color = NA, alpha = 0.3) +
geom_jitter(data = slope_tidy_smr, aes(x = salinity_group, y = MO2, fill = salinity_group),
shape = 21, size = 2, color = "black", alpha = 0.2) +
geom_boxplot(data = slope_tidy_smr, aes(x = salinity_group, y = MO2, fill = salinity_group),
size = 1, alpha = 0.5, outlier.shape = NA, width = 0.3) +
geom_point(data = mean_mo2_salinity,
aes(x = salinity_group, y = mean_mo2, fill = salinity_group),
size = 3, alpha = 0.8, colour = "black", stroke = 2) +
scale_fill_manual(values = c("#4B5320", "#000080")) + # Custom fill colours
scale_colour_manual(values = c("#4B5320", "#000080")) +
theme_clean() +
theme(legend.position = "none") +
labs(
subtitle = "",
x = "Salinity group (ppt)",
y = "MO2 (mg O2 g/h)"
)
fig_i
Plotting MO2 estimates for each fish. The dashed red line is Chabot
SMR methods, and the solid line is the mean of the lowest 3 measures
(excluding the first 5 cycles)
# Create output directory if needed
output_fig_slopes_wd <- file.path(output_fig_wd, "slopes")
if (!dir.exists(output_fig_slopes_wd)) {
dir.create(output_fig_slopes_wd)
}
ids <- slope_tidy %>%
dplyr::distinct(id) %>%
pull(id) %>%
as.list()
MO2_plot_list <- list()
# 1) Open the PDF device once
pdf(file = file.path(output_fig_slopes_wd, "combined_slopes.pdf"), width = 8, height = 6)
# 2) Loop over IDs and create each plot
for (id_i in ids) {
smr_chabot <- slope_tidy %>%
dplyr::filter(id == id_i) %>%
dplyr::slice(1) %>%
dplyr::pull(SMR_CHABOT)
smr_l3 <- slope_tidy %>%
dplyr::filter(id == id_i) %>%
dplyr::slice(1) %>%
dplyr::pull(SMR)
plot <- slope_tidy %>%
dplyr::filter(id == id_i) %>%
ggplot(aes(x = o2, y = MO2)) + geom_hline(yintercept = smr_chabot, linetype = "dashed",
color = "darkred") + geom_hline(yintercept = smr_l3, color = "darkred") +
geom_point(aes(colour = phase)) + theme_clean() + labs(subtitle = paste0(id_i,
" slopes"), x = "Mean o2 (mg_per_l)", y = "abs(mo2) (mg_per_l)")
# Instead of saving each plot separately, just print it
print(plot)
MO2_plot_list[[id_i]] <- plot
}
# 3) Close the PDF device *after* the loop
dev.off()
## png
## 2
for (p in MO2_plot_list) {
print(p)
}
Here we scale our predictors for the model
center_list <- c("temp", "order", "mass")
slope_tidy_smr <- slope_tidy_smr %>%
dplyr::mutate(across(all_of(center_list), ~scale(.x, center = TRUE, scale = FALSE),
.names = "{.col}_z"), light_dark_c = if_else(light_dark == "light", 1, 0))
Here we will use a Bayesian Generalised Linear Mixed Model (GLMM)
with a Gamma distribution and a log link, where the shape parameter (K)
is also modelled as a function of predictors. This models MO2 by
salinity during the SMR phase to see if the fish held at different
salinities have different SMRs. We have also added a few scaled
predictors, that may help describe variation in the data, such as mass
(g; 0.21–1.6) temperature (°C; 13.841–14.277), measurement order (1–28),
and light/dark cycle (light or dark; light between 07:00:00 and
19:00:00), we also include a random effect for fish id to account for
multiple MO2 measures on each fish. We allowed the the shape parameter
(K) to vary as a function of some of the predictors
(e.g. salinity_group, order_z) to improve fit.
smr_gamma_bf <- bf(MO2 ~ temp_z + order_z + light_dark_c + mass_z + salinity_group +
(1 | id), shape ~ salinity_group + order_z, family = Gamma(link = "log"))
These are the default priors. We will use these.
suppressWarnings(get_prior(smr_gamma_bf, data = slope_tidy_smr, family = Gamma(link = "log")))
## prior class coef group resp dpar nlpar lb ub
## (flat) b
## (flat) b light_dark_c
## (flat) b mass_z
## (flat) b order_z
## (flat) b salinity_group9
## (flat) b temp_z
## student_t(3, -2.7, 2.5) Intercept
## student_t(3, 0, 2.5) sd 0
## student_t(3, 0, 2.5) sd id 0
## student_t(3, 0, 2.5) sd Intercept id 0
## (flat) b shape
## (flat) b order_z shape
## (flat) b salinity_group9 shape
## student_t(3, 0, 2.5) Intercept shape
## source
## default
## (vectorized)
## (vectorized)
## (vectorized)
## (vectorized)
## (vectorized)
## default
## default
## (vectorized)
## (vectorized)
## default
## (vectorized)
## (vectorized)
## default
Here we run the modle, I have hased this out because I have saved the model for quick reloading.
# setwd(output_mods_wd) smr_mod_gamma <- brm(smr_gamma, data = slope_tidy_smr,
# cores = 4, chains = 4, warmup = 1000, seed = 143019, thin = 2, iter = 8000,
# save_pars = save_pars(all=TRUE), sample_prior = TRUE, file = 'smr_mod_gamma')
# print('Model complete')
Here we reload the model
setwd(output_mods_wd)
smr_mod_gamma <- readRDS(file = "smr_mod_gamma.rds")
Checking model convergence
plot(smr_mod_gamma, ask = F)
Checking rhat are equal to one
tidy(rhat(smr_mod_gamma)) %>%
dplyr::rename(rhat = x)
## # A tibble: 133 × 2
## names rhat
## <chr> <dbl>
## 1 b_Intercept 1.00
## 2 b_shape_Intercept 1.00
## 3 b_temp_z 1.00
## 4 b_order_z 1.00
## 5 b_light_dark_c 1.00
## 6 b_mass_z 1.00
## 7 b_salinity_group9 1.00
## 8 b_shape_salinity_group9 1.00
## 9 b_shape_order_z 1.00
## 10 sd_id__Intercept 1.00
## # ℹ 123 more rows
Using leave one out (loo) measure of fit, the model appears to
preform well, all Pareto k estimates are good (k < 0.7)
loo(smr_mod_gamma)
##
## Computed from 14000 by 895 log-likelihood matrix.
##
## Estimate SE
## elpd_loo 2256.5 38.6
## p_loo 71.5 7.0
## looic -4513.0 77.2
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.7, 1.0]).
##
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
Model predictions generally align with the observed data
plot <- pp_check(smr_mod_gamma, type = "dens_overlay")
plot
We did not see a meaningful difference between the SMR measurements of fish from the two salinity treatments.
Table 1: Fixed effect Estimates (β) and 95% Credible Intervals (95% CI) from a
model_est <- fixef(smr_mod_gamma, probs = c(0.025, 0.975)) %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "Predictor") %>%
dplyr::mutate(β = round(Estimate, 3), Q2.5 = round(Q2.5, 3), Q97.5 = round(Q97.5,
3), `95% CI` = paste0("[", Q2.5, ", ", Q97.5, "]"))
model_est %>%
dplyr::select(Predictor, "β", "95% CI") %>%
gt()
| Predictor | β | 95% CI |
|---|---|---|
| Intercept | -2.615 | [-2.739, -2.494] |
| shape_Intercept | 2.568 | [2.43, 2.7] |
| temp_z | -0.344 | [-0.768, 0.076] |
| order_z | 0.001 | [-0.002, 0.005] |
| light_dark_c | 0.128 | [0.08, 0.174] |
| mass_z | 1.237 | [0.908, 1.566] |
| salinity_group9 | -0.096 | [-0.272, 0.082] |
| shape_salinity_group9 | 0.179 | [-0.02, 0.379] |
| shape_order_z | -0.042 | [-0.061, -0.024] |
Looking at the marginal mean difference between salinity groups
em_results <- emmeans(smr_mod_gamma, ~salinity_group)
contrast_results <- contrast(em_results, method = "pairwise")
em_results_df <- em_results %>%
tidy() %>%
mutate(across(where(is.numeric), ~exp(.)))
contrast_results_df <- contrast_results %>%
tidy() %>%
mutate(across(where(is.numeric), ~exp(.)))
em_results_df %>%
gt()
| salinity_group | estimate | lower.HPD | upper.HPD |
|---|---|---|---|
| 0 | 0.07805491 | 0.06933489 | 0.08823359 |
| 9 | 0.07092322 | 0.06228137 | 0.08069857 |
emmeans_draws <- smr_mod_gamma %>%
emmeans(~salinity_group) %>%
gather_emmeans_draws() %>%
dplyr::mutate(.value = exp(.value), salinity_group = as.character(salinity_group))
emmeans_contrast_draws <- smr_mod_gamma %>%
emmeans(~salinity_group) %>%
contrast(method = "pairwise") %>%
gather_emmeans_draws() %>%
dplyr::mutate(.value = exp(.value))
round_vars = c(".value", ".lower", ".upper")
smr_emmeans <- emmeans_draws %>%
mean_qi(.width = c(0.889, 0.949)) %>%
mutate(across(all_of(round_vars), ~round(.x, digits = 3)))
smr_contrast <- emmeans_contrast_draws %>%
mean_qi(.width = c(0.889, 0.949)) %>%
mutate(across(all_of(round_vars), ~round(.x, digits = 3)))
smr_emmeans
## # A tibble: 4 × 7
## salinity_group .value .lower .upper .width .point .interval
## <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 0 0.078 0.071 0.086 0.889 mean qi
## 2 9 0.071 0.064 0.079 0.889 mean qi
## 3 0 0.078 0.069 0.088 0.949 mean qi
## 4 9 0.071 0.062 0.081 0.949 mean qi
smr_contrast
## # A tibble: 2 × 7
## contrast .value .lower .upper .width .point .interval
## <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 salinity_group0 - salinity_group9 1.10 0.953 1.27 0.889 mean qi
## 2 salinity_group0 - salinity_group9 1.10 0.922 1.31 0.949 mean qi
Figure 1: The MO2 (mg o2 g/h) during SMR meassurments plotted by salinity treatment. The small transparent points are the observed values, the shaded area behind the points is the a kernel density of the observed data, the large coloured point (to the right) is the observed mean, the large grey point with error bars (to the left) is the model estimated marginal mean (eemean) 95% Credible Intervals (95% CI).
mean_mo2_salinity <- slope_tidy_smr %>%
dplyr::group_by(salinity_group) %>%
dplyr::reframe(mean_mo2 = mean(MO2, na.rm = TRUE))
fig_1 <- ggplot() +
geom_violin(data = slope_tidy_smr,
aes(x = salinity_group, y = MO2, fill = salinity_group),
color = NA, alpha = 0.2) +
geom_jitter(data = slope_tidy_smr,
aes(x = salinity_group, y = MO2, fill = salinity_group),
shape = 21, width = 0.3, size = 1, color = "black", alpha = 0.1) +
geom_point(data = mean_mo2_salinity,
aes(x = salinity_group, y = mean_mo2, fill = salinity_group),
size = 4, alpha = 1, stroke = 2, color = "black", shape = 21,
position = position_nudge(x = 0.05)) +
stat_pointinterval(data = emmeans_draws,
aes(x = salinity_group, y = .value),
color = "black", fill = "grey", point_interval = "mean_qi", .width = 0.95, shape = 21, stroke = 2, point_size = 4, alpha = 1,
position = position_nudge(x = -0.05)) +
scale_fill_manual(values = c("#4B5320", "#000080")) + # Custom fill colours
scale_colour_manual(values = c("#4B5320", "#000080")) +
theme_clean() +
theme(legend.position = "none") +
labs(
subtitle = "",
x = "Salinity group (ppt)",
y = "MO2 (mg O2 g/h)"
)
fig_1
Here we are following the methods Urbina et al. (2012) with an
incremental regression analyses, in order to determine the best fit for
the data.
This analysis evaluated each polynomial order equation starting at
zero and then increasing to the third order. This permitted a
mathematical assessment of whether the data best fitted a single linear
relationship (0th-order polynomial; suggesting the fish were
oxyconforming and do not reach a Pcrit), or whether a PO2 crit value
could be determined as an intersection point of two distinct functions
(one at hypoxic oxygen concentrations, the other at normoxic;
i.e. oxyregulation).
Here we are using a Bayesian approach to model fitting with brm.
These models take a long time to run, so I have saved them and re-loaded
them to save time. I have also saved the summary data produced from the
models, to save time, you can simply skip the hashed code and input the
resulting summary data.
We will run our custom function,
bayes_incremental_regression_by_id. This code
takes a while to run. If you have already run this once, or
have downloaded the saved models from GitHub skip this
step (that’s why its hashed out), and run the next line, which
loads the models.
# output_mods_bayes_wd <- paste0(output_mods_wd, './bayes-regs')
# ifelse(!dir.exists(output_mods_bayes_wd), dir.create(output_mods_bayes_wd),
# 'Folder already exists') ids <- slope_tidy %>% dplyr::distinct(id) %>%
# pull(id) plan(multisession) future_map( ids,
# bayes_incremental_regression_by_id, id_name = 'id', data = slope_tidy,
# response = 'MO2_g', predictor = 'DO', save_models = TRUE, mod_output_wd =
# output_mods_bayes_wd ) plan(sequential)
Load all models and store in a list, will use a lot of memory. You
can also skip this step and load the reulsting data frames below. I am
using the custom function load_rds, so we can compare
them and generate predictions.
# bayes_reg_mods <- load_rds(model_dw = output_mods_bayes_wd)
Get model fit parameters loo and r2 using the custom function,
incremental_regression_bayes_fits.
# setwd(mod_data_wd) bayes_reg_mods_fit <-
# incremental_regression_bayes_fits(models = bayes_reg_mods)
# write.csv(bayes_reg_mods_fit, 'bayes_reg_mods_fit.csv', row.names = FALSE)
Reading in this model fit data frame, in the case you did not load in
all the models.
setwd(mod_data_wd)
bayes_reg_mods_fit <- read.csv("bayes_reg_mods_fit.csv")
Selecting the best fitting model
elpd_loo, or the expected log pointwise predictive density for
leave-one-out cross-validation, is a metric used in Bayesian model
evaluation to assess the predictive accuracy of a model.
The elpd_loo is an approximation of how well the model is expected to
predict new data, based on leave-one-out cross-validation. Higher
elpd_loo values indicate better predictive performance.
best_fit_bayes_reg <- bayes_reg_mods_fit %>%
dplyr::group_by(id) %>%
dplyr::mutate(elpd_loo_rank = rank(-elpd_loo)) %>%
dplyr::select(id, model_type, elpd_loo, r2, elpd_loo_rank, r2_q2.5, r2_q97.5,
estimate_DO, conf.low_DO, conf.high_DO)
Pulling our model predictions using a custom function
bayes_mod_predictions.
# setwd(mod_data_wd) bayes_reg_mods_predictions <- bayes_mod_predictions(models
# = bayes_reg_mods, original_data = slope_tidy)
# write.csv(bayes_reg_mods_predictions, 'bayes_reg_mods_predictions.csv',
# row.names = FALSE)
Reading in the predicted data
setwd(mod_data_wd)
bayes_reg_mods_predictions <- read.csv("bayes_reg_mods_predictions.csv")
We are going to combined this with our best fitting model df, so we
know how they ranks for LOO.
bayes_reg_mods_predictions <- full_join(bayes_reg_mods_predictions, best_fit_bayes_reg,
by = c("id", "model_type"))
The best fitting models were most often a 2nd-order polynomial
(n = 22, 38%) or a 3rd-order polynomial (n = 16, 28%).
This could suggest the presence of a critical oxygen threshold (Pcrit)
where the relationship between o2 and MO2 changes. To confirm their is a
Pcrit, we need to validated the shape of the polynomials and in should
use a more specific model to test the Pcrit value. In any case, This
type of model is indicative of oxyregulator.
The next most common are 0th-order and 1st-order polynomials (both
n = 10, 17%). In the case of the 0th-order model, it suggests
that MO2 does not show a statistically significant dependence on the o2.
In other words, the metabolic rate does not adjust based on oxygen
availability, and there is no clear critical oxygen threshold (Pcrit)
where the relationship changes. This is indicative of a
oxyregulator. In the case of the 1st-order polynomials,
it suggest the presences of linear relationship between o2 and MO2,
which is indicative of oxyconformer. However, to be
true evidence of a oxyconformer this relationship should be positive
(i.e. as o2 falls MO2 also falls). Only 2 of the 10 individuals best
modelled with a linear function had positive estimates with credible
intervals that did not overlap with zero (Table 1).
best_mod <- best_fit_bayes_reg %>%
dplyr::filter(elpd_loo_rank == 1)
total_fish <- nrow(best_mod)
table_bwm <- best_mod %>%
dplyr::group_by(model_type) %>%
dplyr::reframe(n = length(id), percent = round((n/total_fish) * 100, 2)) %>%
dplyr::mutate(best_model_name = case_when(model_type == "lm_0" ~ "0th-order polynomial",
model_type == "lm_1" ~ "1st-order polynomial", model_type == "lm_2" ~ "2nd-order polynomial",
model_type == "lm_3" ~ "3rd-order polynomial", TRUE ~ "ERROR")) %>%
dplyr::select(best_model_name, everything(), -model_type)
table_bwm %>%
gt() %>%
cols_align(align = "center", columns = everything())
| best_model_name | n | percent |
|---|---|---|
| 0th-order polynomial | 10 | 17.24 |
| 1st-order polynomial | 10 | 17.24 |
| 2nd-order polynomial | 22 | 37.93 |
| 3rd-order polynomial | 16 | 27.59 |
Summary of fish best model with a linear function.
table_lm_1 <- best_mod %>%
dplyr::filter(model_type == "lm_1") %>%
dplyr::mutate(r_sq_ci = paste0(round(r2, 3), " (", round(r2_q2.5, 3), "–",
round(r2_q97.5, 3), ")"), est_ci = paste0(round(estimate_DO, 6), " (", round(conf.low_DO,
6), "–", round(conf.high_DO, 6), ")"), conformer = if_else(conf.low_DO >
0, "Conforming", "Not conforming")) %>%
dplyr::select(id, r_sq_ci, est_ci, conformer) %>%
dplyr::arrange(conformer) %>%
dplyr::ungroup()
table_lm_1 %>%
gt() %>%
cols_align(align = "center", columns = everything()) %>%
cols_label(id = "Fish ID", r_sq_ci = "r2 (CI)", est_ci = "Estimate (CI)", conformer = "Evidence of oxyconforming")
| Fish ID | r2 (CI) | Estimate (CI) | Evidence of oxyconforming |
|---|---|---|---|
| a_9_22nov_4 | 0.312 (0.094–0.491) | 0.000849 (0.000422–0.001286) | Conforming |
| d_9_25nov_3 | 0.197 (0.012–0.394) | 0.001093 (0.000262–0.001907) | Conforming |
| a_0_24nov_1 | 0.029 (0–0.134) | 0.00017 (-0.000508–0.000862) | Not conforming |
| a_0_24nov_3 | 0.058 (0–0.202) | 0.000468 (-0.000291–0.001217) | Not conforming |
| a_0_25nov_1 | 0.137 (0.002–0.331) | -0.000358 (-0.000702–-1.5e-05) | Not conforming |
| a_9_22nov_1 | 0.084 (0–0.266) | 0.000324 (-0.000168–0.000814) | Not conforming |
| b_0_25nov_2 | 0.108 (0.001–0.29) | 0.000297 (-3e-05–0.000622) | Not conforming |
| c_0_22nov_3 | 0.072 (0–0.256) | 0.000531 (-0.000498–0.001572) | Not conforming |
| c_9_27nov_2 | 0.068 (0–0.255) | 0.000266 (-0.000304–0.000848) | Not conforming |
| d_9_25nov_2 | 0.33 (0.102–0.507) | -0.000891 (-0.001339–-0.000444) | Not conforming |
Now we are plotting each of the regressions. First making a directory
to save the figures
incremental_reg_bayes_wd <- file.path(output_fig_wd, "incremental_regressions./bayes")
if (!dir.exists(incremental_reg_bayes_wd)) {
dir.create(incremental_reg_bayes_wd)
}
Ploting all regression, and highlighting the model that has the best
fit, based on AIC values
# Create a list to store the plots
plots <- list()
model_preds_list <- list()
for (id_i in ids) {
# Filter data for the current ID
df_i <- bayes_reg_mods_predictions %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(line_size = if_else(elpd_loo_rank == 1, 2, 1),
alpha_value = if_else(elpd_loo_rank == 1, 1, 0.4))
x_min <- df_i %>%
dplyr::reframe(min = min(DO), na.rm = TRUE) %>%
dplyr::pull(min)
y_max <- df_i %>%
dplyr::reframe(max = max(MO2_g), na.rm = TRUE) %>%
dplyr::pull(max)
best_weighted_model_i <- best_fit_bayes_reg %>%
dplyr::filter(id == id_i & elpd_loo_rank == 1)
poly_i_name <- best_weighted_model_i %>%
dplyr::mutate(name = case_when(
model_type == "lm_0" ~ "0th-order polynomial",
model_type == "lm_1" ~ "1st-order polynomial",
model_type == "lm_2" ~ "2nd-order polynomial",
model_type == "lm_3" ~ "3rd-order polynomial",
TRUE ~ "ERROR"
)) %>%
dplyr::pull(name)
r_i <- best_weighted_model_i %>%
dplyr::mutate(r_sq_ci = paste0(round(r2, 3), " (",
round(r2_q2.5, 3), "–",
round(r2_q97.5, 3), ")")) %>%
dplyr::pull(r_sq_ci)
# Create the plot
p <- ggplot() +
geom_ribbon(data = df_i,
aes(x = DO, y = predicted, ymin = pred_lower, ymax = pred_upper, fill = model_type), alpha = 0.1) +
geom_line(data = df_i,
aes(x = DO, y = predicted, colour = model_type, size = line_size, alpha = alpha_value)) +
geom_point(data = df_i %>% dplyr::filter(elpd_loo_rank == 1), aes(x = DO, y = MO2_g), alpha = 0.6, colour = "black", size = 2) +
scale_colour_manual(values = c("red", "blue", "green", "purple"),
labels = c("0th Order", "1st Order", "2nd Order", "3rd Order")) +
scale_size_identity() + # Use the size values directly
scale_alpha_identity(guide = "none") + # Remove the alpha legend
annotate("text", x = x_min,
y = y_max,
label = paste0("Best fit: ",poly_i_name, "\n", "r2 = ", r_i),
hjust = 0, vjust = 1, size = 4) +
labs(
title = paste("Model Fits vs Raw Data for ID", id_i),
x = "Dissolved oxygen percentage (DO)",
y = "MO2 (o2 mg/g/h)",
colour = "Model") +
theme_classic()
# Store the plot
plots[[id_i]] <- p
print(p)
}
#To save all plots to individual files
for (id_i in ids) {
ggsave(filename = paste0(incremental_reg_bayes_wd, "./plot_", id_i, ".png"), plot = plots[[id_i]], width = 8, height = 6)
}
#HERE
output_mods_bayes_global_wd <- paste0(output_mods_wd, "./bayes-regs-global")
ifelse(!dir.exists(output_mods_bayes_global_wd), dir.create(output_mods_bayes_global_wd),
"Folder already exists")
## [1] "Folder already exists"
##Need only best fitting model Here we are grouping fish by best fitting model and getting an average trend
# best_fit <- bayes_reg_mods_predictions %>% dplyr::filter(elpd_loo_rank == 1)
# ids <- best_fit %>% dplyr::distinct(model_type) %>% pull(model_type)
# plan(multisession) future_map( ids, bayes_incremental_regression_by_id,
# id_name = 'model_type', data = best_fit, response = 'MO2_g', predictor =
# 'DO', save_models = TRUE, mod_output_wd = output_mods_bayes_global_wd )
# plan(sequential)
bayes_reg_mods <- load_rds(model_dw = output_mods_bayes_global_wd)
# ggplot() + geom_ribbon(data = df_i, aes(x = DO, y = predicted, ymin =
# pred_lower, ymax = pred_upper, fill = model_type), alpha = 0.1) +
# geom_line(data = df_i, aes(x = DO, y = predicted, colour = model_type, size =
# line_size, alpha = alpha_value)) + geom_point(data =
# bayes_reg_mods_predictions %>% dplyr::filter(elpd_loo_rank == 1), aes(x = DO,
# y = MO2_g), alpha = 0.1, colour = 'black', size = 1) + geom_line(data =
# bayes_reg_mods_predictions %>% dplyr::filter(elpd_loo_rank == 1), aes(x = DO,
# y = predicted, by = id), alpha = 0.2) + scale_colour_manual(values = c('red',
# 'blue', 'green', 'purple'), labels = c('0th Order', '1st Order', '2nd Order',
# '3rd Order')) + annotate('text', x = x_min, y = y_max, label = paste0('Best
# fit: ',poly_i_name, '\n', 'r2 = ', r_i), hjust = 0, vjust = 1, size = 4) +
# facet_wrap(~model_type) + labs( title = paste('Model Fits vs Raw Data for
# ID', id_i), x = 'Dissolved oxygen percentage (DO)', y = 'MO2 (mg o2/g/h)',
# colour = 'Model') + theme_classic()
# global_models <- list( lm_0 = lmer(MO2_g ~ 1 + (1|id), data = slope_tidy %>%
# dplyr::filter(poly == 0), weights = weight_smr), lm_1 = lmer(MO2_g ~ DO +
# (1|id), data = slope_tidy %>% dplyr::filter(poly == 1), weights =
# weight_smr), lm_2 = lmer(MO2_g ~ poly(DO, 2) + (1|id), data = slope_tidy %>%
# dplyr::filter(poly == 2), weights = weight_smr), lm_3 = lmer(MO2_g ~ poly(DO,
# 3) + (1|id), data = slope_tidy %>% dplyr::filter(poly == 3), weights =
# weight_smr) ) global_predictions <- data.frame(DO = seq(min(slope_tidy$DO),
# max(slope_tidy$DO), length.out = 100)) for (model_name in
# names(global_models)) { predictions <- predict( global_models[[model_name]],
# newdata = global_predictions, re.form = NA, # Excludes random effects
# (population-level predictions) se.fit = TRUE # Returns standard errors )
# global_predictions[[paste0(model_name, '_fit')]] <- predictions$fit
# global_predictions[[paste0(model_name, '_lwr')]] <- predictions$fit - 1.96 *
# predictions$se.fit global_predictions[[paste0(model_name, '_upr')]] <-
# predictions$fit + 1.96 * predictions$se.fit } global_predictions_long <-
# global_predictions %>% pivot_longer( cols =
# matches('lm_.*_fit|lm_.*_lwr|lm_.*_upr'), names_to = c('model', '.value'),
# names_pattern = '(lm_\\d+)_(.*)' ) %>% dplyr::mutate(best_model_name =
# case_when( model == 'lm_0' ~ '0th-order polynomial', model == 'lm_1' ~
# '1st-order polynomial', model == 'lm_2' ~ '2nd-order polynomial', model ==
# 'lm_3' ~ '3rd-order polynomial', TRUE ~ 'ERROR' ))
Figure
# best_weighted_model_pred <- best_weighted_model %>% dplyr::left_join(.,
# model_preds_df, by = c('id', 'model')) %>% dplyr::ungroup() %>%
# dplyr::mutate(best_model_name = case_when( poly == 0 ~ '0th-order
# polynomial', poly == 1 ~ '1st-order polynomial', poly == 2 ~ '2nd-order
# polynomial', poly == 3 ~ '3rd-order polynomial', TRUE ~ 'ERROR' ))
# annotation_data <- table_bwm %>% dplyr::select(best_model_name, n) fig_1 <-
# ggplot() + geom_line(data = best_weighted_model_pred, aes(x = DO, y =
# MO2_pred, color = id), size = 1, alpha = 1) + geom_point(data = slope_tidy,
# aes(x = DO, y = MO2_g), alpha = 0.1, colour = 'black', size = 2) +
# geom_ribbon(data = global_predictions_long, aes(x = DO, ymin = lwr, ymax =
# upr, group = model), fill = '#FC6C85', alpha = 0.2) + # Shaded confidence
# intervals geom_line(data = global_predictions_long, aes(x = DO, y = fit),
# size = 1.5, color = '#FF007F') + facet_wrap(~best_model_name) +
# scale_color_grey(start = 0.1, end = 0.9) + labs( title = paste('Model
# estimates and observed data grouped by best fitting model'), x = 'Dissolved
# oxygen percentage (DO)', y = 'MO2 (O2 mg/g/h)') + theme_classic() +
# theme(legend.position = 'none') + geom_text(data = annotation_data, aes(x =
# -Inf, y = Inf, label = paste0('italic(n) == ', n)), hjust = -0.1, vjust =
# 1.2, inherit.aes = FALSE, parse = TRUE) fig_1
For those fish that were best modelled with a 2nd or 3rd-order
polynomial (n = 46) we will check to see if a Pcrit is present.
We are filtering the data for only those fish.
check_pcrit_ids <- bayes_reg_mods_predictions %>%
dplyr::filter(model_type == "lm_1") %>%
dplyr::distinct(id) %>%
dplyr::pull(id)
check_pcrit_df <- slope_tidy %>%
dplyr::filter(id %in% check_pcrit_ids)
We will calculate Pcrit using Chabot method and function calcO2crit.
We are using our estimates for SMR (mean of lowest three).
This function uses the fifth percentile of the MO2 values observed at
dissolved oxygen levels ≥ 80% air saturation as the criterion to assess
low MO2 values. The algorithm then identifies all the MO2 measurements
greater than this minimally acceptable MO2 value. Within this sub-set,
it identifies the ̇ MO2 measurement made at the lowest DO and thereafter
considers this DO as candidate for breakpoint (named pivotDO in the
script). A regression is then calculated using observations at DO levels
< pivotDO, and a first estimate of O2crit is calculated as the
intersection of this regression line with the horizontal line
representing SMR. The script then goes through validation steps to
ensure that the slope of the regression is not so low that the line,
projected to normoxic DO levels, passes below any MO2 values observed in
normoxia. It also ensures that the intercept is not greater than zero.
Corrective measures are taken if such problems are encountered.
lowestMO2 default is the quantile(Data\(MO2[Data\)DO >= 80], p=0.05). It is used
to segment the data and locate the pivotDO.
ids <- check_pcrit_df %>%
dplyr::distinct(id) %>%
dplyr::pull()
pcrit_model_df_list <- list()
pcrit_models <- list()
for (id_i in ids) {
df_i <- check_pcrit_df %>%
dplyr::filter(id == id_i)
o2crit <- calcO2crit(Data = df_i, SMR = df_i$SMR[1], lowestMO2 = NA, gapLimit = 4,
max.nb.MO2.for.reg = 7)
vaule <- o2crit$o2crit
lowestMO2 = quantile(df_i$MO2[df_i$DO >= 80], p = 0.05)
SMR <- o2crit$SMR
nb_mo2_conforming <- o2crit$Nb_MO2_conforming
r2 <- o2crit$r2
method <- o2crit$Method
p <- o2crit$P[1]
pcrit_model_df <- tibble(id = id_i, pcrit_vaule = vaule, pcrit_smr = SMR, pcrit_lowestMO2 = lowestMO2,
pcrit_nb_mo2_conforming = nb_mo2_conforming, pcrit_r2 = r2, pcrit_method = method,
pcrit_p = p)
pcrit_model_df_list[[id_i]] <- pcrit_model_df
pcrit_models[[id_i]] <- o2crit
}
pcrit_model_df <- bind_rows(pcrit_model_df_list)
Here’s the plots for the Pcrit estimates
# Create output directory if needed
output_fig_pcrit_chabot_wd <- file.path(output_fig_wd, "model_chabot")
if (!dir.exists(output_fig_pcrit_chabot_wd)) {
dir.create(output_fig_pcrit_chabot_wd)
}
ids <- check_pcrit_df %>%
dplyr::distinct(id) %>%
dplyr::pull()
pcrit_chabot_list <- list()
# Open a single PDF device
pdf(file = file.path(output_fig_pcrit_chabot_wd, "combined_chabot_plots.pdf"), width = 8,
height = 6)
for (id_i in ids) {
r2 <- pcrit_model_df %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_r2 = round(pcrit_r2, 3)) %>%
dplyr::pull(pcrit_r2)
conforming <- pcrit_model_df %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_nb_mo2_conforming = round(pcrit_nb_mo2_conforming, 3)) %>%
dplyr::pull(pcrit_nb_mo2_conforming)
P <- pcrit_model_df %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_p = round(pcrit_p, 3)) %>%
dplyr::pull(pcrit_p)
SMR <- pcrit_model_df %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_smr = round(pcrit_smr, 3)) %>%
dplyr::pull(pcrit_smr)
lowestMO2 <- pcrit_model_df %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_lowestMO2 = round(pcrit_lowestMO2, 3)) %>%
dplyr::pull(pcrit_lowestMO2)
# Generate and render the plot
plotO2crit(o2critobj = pcrit_models[[id_i]])
# Add a title
mtext(text = paste0(id_i), side = 3, line = 2, adj = 0, col = "blue", font = 2,
cex = 1.2)
mtext(text = paste0("R2 = ", r2, "; p = ", P, "; CP < SMR = ", conforming, "; SMR = ",
SMR, "; lowestMO2 = ", lowestMO2), side = 3, line = 1, adj = 0, col = "blue",
font = 1, cex = 0.8)
}
# Close the PDF device *after* the loop
dev.off()
## png
## 2
Printing in htlm document
ids <- check_pcrit_df %>%
dplyr::distinct(id) %>%
dplyr::pull()
for (id_i in ids) {
comment <- check_pcrit_df %>%
dplyr::filter(id == id_i) %>%
dplyr::slice(1) %>%
dplyr::mutate(comment = if_else(is.na(comments), "", paste0("#", comments))) %>%
pull(comment)
r2 <- pcrit_model_df %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_r2 = round(pcrit_r2, 3)) %>%
dplyr::pull(pcrit_r2)
conforming <- pcrit_model_df %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_nb_mo2_conforming = round(pcrit_nb_mo2_conforming, 3)) %>%
dplyr::pull(pcrit_nb_mo2_conforming)
P <- pcrit_model_df %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_p = round(pcrit_p, 3)) %>%
dplyr::pull(pcrit_p)
SMR <- pcrit_model_df %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_smr = round(pcrit_smr, 3)) %>%
dplyr::pull(pcrit_smr)
lowestMO2 <- pcrit_model_df %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_lowestMO2 = round(pcrit_lowestMO2, 3)) %>%
dplyr::pull(pcrit_lowestMO2)
# Generate and render the plot
plotO2crit(o2critobj = pcrit_models[[id_i]])
# Add a title
mtext(text = paste0(id_i, " ", comment), side = 3, line = 2, adj = 0, col = "blue",
font = 2, cex = 1.2)
mtext(text = paste0("R2 = ", r2, "; p = ", P, "; CP < SMR = ", conforming, "; SMR = ",
SMR, "; lowestMO2 = ", lowestMO2), side = 3, line = 1, adj = 0, col = "blue",
font = 1, cex = 0.8)
}
We need to set some rules as to when the Pcrit estimates are
reliable, as it seems many of our fish do not reach a Pcrit.
We can filter for only cases were at the lowest O2 value three consecutive MO2 measures full below our SMR and fifth percentile of the MO2 values observed at dissolved O2 levels > 80%. In the model output these are called nb_mo2_conforming points. We can the visually inspect these to see if a Pcrit is present.
pcrit_list <- pcrit_model_df %>%
dplyr::filter(pcrit_nb_mo2_conforming > 2) %>%
pull(id)
paste0("Based on this rule there are ", length(pcrit_list), " fish with possible Pcrits.")
## [1] "Based on this rule there are 14 fish with possible Pcrits."
Here are the plots of these 13 fish for visual confirmation
for (id_i in pcrit_list) {
comment <- check_pcrit_df %>%
dplyr::filter(id == id_i) %>%
dplyr::slice(1) %>%
dplyr::mutate(comment = if_else(is.na(comments), "", paste0("#", comments))) %>%
pull(comment)
r2 <- pcrit_model_df %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_r2 = round(pcrit_r2, 3)) %>%
dplyr::pull(pcrit_r2)
conforming <- pcrit_model_df %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_nb_mo2_conforming = round(pcrit_nb_mo2_conforming, 3)) %>%
dplyr::pull(pcrit_nb_mo2_conforming)
P <- pcrit_model_df %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_p = round(pcrit_p, 3)) %>%
dplyr::pull(pcrit_p)
SMR <- pcrit_model_df %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_smr = round(pcrit_smr, 3)) %>%
dplyr::pull(pcrit_smr)
lowestMO2 <- pcrit_model_df %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_lowestMO2 = round(pcrit_lowestMO2, 3)) %>%
dplyr::pull(pcrit_lowestMO2)
# Generate and render the plot
plotO2crit(o2critobj = pcrit_models[[id_i]])
# Add a title
mtext(text = paste0(id_i, " ", comment), side = 3, line = 2, adj = 0, col = "blue",
font = 2, cex = 1.2)
mtext(text = paste0("R2 = ", r2, "; p = ", P, "; CP < SMR = ", conforming, "; SMR = ",
SMR, "; lowestMO2 = ", lowestMO2), side = 3, line = 1, adj = 0, col = "blue",
font = 1, cex = 0.8)
}
Based on visual checks the following fish do have clear Pcrit values
do_have_pcrit <- c("a_9_21nov_3", "b_0_24nov_1", "b_0_24nov_2", "b_0_25nov_1", "b_0_25nov_3",
"b_0_26_nov_1", "b_9_21_nov_1", "b_9_21nov_2", "b_9_21nov_3", "d_0_21nov_3")
n_pcrit <- length(do_have_pcrit)
have_pcirt <- pcrit_model_df %>%
dplyr::filter(id %in% do_have_pcrit)
mean_pcrit <- have_pcirt %>%
dplyr::reframe(mean = mean(pcrit_vaule)) %>%
pull(mean) %>%
round(., 2)
min_pcrit <- have_pcirt %>%
dplyr::reframe(min = min(pcrit_vaule)) %>%
pull(min) %>%
round(., 2)
max_pcrit <- have_pcirt %>%
dplyr::reframe(max = max(pcrit_vaule)) %>%
pull(max) %>%
round(., 2)
print(paste0("There are ", n_pcrit, " fish with identified Pcrits and the mean Pcrit is ",
mean_pcrit, " (range: ", min_pcrit, "–", max_pcrit, ")"))
## [1] "There are 10 fish with identified Pcrits and the mean Pcrit is 24.84 (range: 17.1–32.8)"
We will use weighted regression to account for a higher density of
data at normoxic conditions (i.e. SMR values). I have used two different
weighting approaches, (1) weighting the importance of each data point
based on the frequency of points in a given o2 space (12 evenly spaced
bins), or (2) weighting only the SMR slopes, as they are the only values
that have repeated measures. Points that have higher weights influence
the model fit more, while points with lower weights have less impact. A
high density of points at high o2 values could lead to overfitting in
that region, while underfitting or misrepresenting trends in
lower-density regions (e.g., low o2 vaules).
Here we are making the two weightings. The first is achieved by
spiting the o2 data in 12 evenly spaced bins, and summing the number of
data points in that bin, the weight is then given to data points within
that bin based on the inverse frequency of data points.The second
methods, is simply applied to the SMR measurements, and is the inverse
frequency of SMR measurements.
slope_tidy <- slope_tidy %>%
dplyr::group_by(id) %>%
dplyr::mutate(o2_bin = cut(DO, breaks = 12),
weight_smr = if_else(phase == "smr", 1/sum(phase == "smr"), 1)) %>%
ungroup() %>%
dplyr::group_by(id, o2_bin) %>%
dplyr::mutate(
bin_freq = length(order), # Count points in each bin
weight_bins = 1 / bin_freq # Weight = inverse frequency
) %>%
ungroup()
Here we are building the weighted regressions.
ids <- slope_tidy %>%
dplyr::distinct(id) %>%
pull(id) %>%
as.list()
weighted_model_comparison_list <- list()
weighted_model_results_list <- list()
for (id_i in ids) {
# Filter data for the current ID
df_i <- slope_tidy %>%
dplyr::filter(id == id_i)
# Fit models with weights
models <- list(
lm_0 = lm(MO2_g ~ 1, data = df_i, weights = weight_smr), # 0th-order (constant mean)
lm_1 = lm(MO2_g ~ DO, data = df_i, weights = weight_smr), # 1st-order (linear)
lm_2 = lm(MO2_g ~ poly(DO, 2), data = df_i, weights = weight_smr), # 2nd-order (quadratic)
lm_3 = lm(MO2_g ~ poly(DO, 3), data = df_i, weights = weight_smr) # 3rd-order (cubic)
)
# Extract metrics to compare models
weighted_model_comparison_list[[id_i]] <- purrr::map_df(models, glance, .id = "model") %>%
dplyr::mutate(id = id_i) %>%
dplyr::select(id, everything())
weighted_model_results_list[[id_i]] <- map_df(models, ~ tidy(.x, conf.int = TRUE), .id = "model") %>%
clean_names() %>%
mutate(id = id_i) %>%
select(id, everything())
}
# Combine dataframes into a single data frame
weighted_model_comparison <- bind_rows(weighted_model_comparison_list) %>%
dplyr::mutate(poly = as.numeric(stringr::str_remove_all(model, "lm_")))
weighted_model_results <- bind_rows(weighted_model_results_list) %>%
dplyr::mutate(poly = as.numeric(stringr::str_remove_all(model, "lm_")))
Selecting the best fitting model
best_weighted_model <- weighted_model_comparison %>%
dplyr::group_by(id) %>%
dplyr::mutate(
AIC_rank = rank(-AIC), # Rank descending for AIC
BIC_rank = rank(-BIC), # Rank descending for BIC
logLik_rank = rank(logLik), # Rank ascending for logLik
score = AIC_rank + BIC_rank + logLik_rank # Combined score
) %>%
dplyr::arrange(desc(score)) %>% # Arrange by descending score
dplyr::slice(1)
Now we are plotting each of the regressions. First making a directory
to save the figures
incremental_reg_freq_wd <- file.path(output_fig_wd, "incremental_regressions./freq")
if (!dir.exists(incremental_reg_freq_wd)) {
dir.create(incremental_reg_freq_wd)
}
Ploting all regression, and highlighting the model that has the best
fit, based on AIC values
# Create a list to store the plots
plots <- list()
model_preds_list <- list()
for (id_i in ids) {
# Filter data for the current ID
df_i <- slope_tidy %>%
filter(id == id_i)
x_min <- df_i %>%
dplyr::reframe(min = min(DO), na.rm = TRUE) %>%
dplyr::pull(min)
y_max <- df_i %>%
dplyr::reframe(max = max(MO2_g), na.rm = TRUE) %>%
dplyr::pull(max)
# Get model predictions
df_predictions <- data.frame(DO = seq(min(df_i$DO), max(df_i$DO), length.out = 100))
df_predictions$id <- id_i
# Generate predictions for each model
models <- list(
lm_0 = lm(MO2_g ~ 1, data = df_i, weights = df_i$weight_smr),
lm_1 = lm(MO2_g ~ DO, data = df_i, weights = df_i$weight_smr),
lm_2 = lm(MO2_g ~ poly(DO, 2), data = df_i, weights = df_i$weight_smr),
lm_3 = lm(MO2_g ~ poly(DO, 3), data = df_i, weights = df_i$weight_smr)
)
best_weighted_model_i <- best_weighted_model %>%
dplyr::filter(id == id_i)
poly_i_name <- best_weighted_model_i %>%
dplyr::mutate(name = case_when(
poly == 0 ~ "0th-order polynomial",
poly == 1 ~ "1st-order polynomial",
poly == 2 ~ "2nd-order polynomial",
poly == 3 ~ "3rd-order polynomial",
TRUE ~ "ERROR"
)) %>%
dplyr::pull(name)
r_i <- best_weighted_model_i %>%
dplyr::pull(r.squared) %>%
round(., 3)
p_value_i <- best_weighted_model_i %>%
dplyr::pull(p.value) %>%
round(., 3)
p_value_i <- if_else(p_value_i < 0.001, "< 0.001", paste0("= ", p_value_i))
model_i <- best_weighted_model_i %>%
dplyr::pull(model)
# Add predictions to the dataframe
for (model_name in names(models)) {
df_predictions[[model_name]] <- predict(models[[model_name]], newdata = df_predictions)
}
# Reshape data for plotting
df_predictions_long <- df_predictions %>%
pivot_longer(cols = starts_with("lm_"), names_to = "model", values_to = "MO2_pred") %>%
mutate(line_size = if_else(model == model_i, 2, 1),
alpha_value = if_else(model == model_i, 1, 0.4))
# Create the plot
p <- ggplot() +
geom_point(data = df_i, aes(x = DO, y = MO2_g), alpha = 0.6, colour = "black", size = 2) +
geom_line(data = df_predictions_long,
aes(x = DO, y = MO2_pred, colour = model, size = line_size, alpha = alpha_value)) +
scale_colour_manual(values = c("red", "blue", "green", "purple"),
labels = c("0th Order", "1st Order", "2nd Order", "3rd Order")) +
scale_size_identity() + # Use the size values directly
scale_alpha_identity(guide = "none") + # Remove the alpha legend
annotate("text", x = x_min,
y = y_max,
label = paste0("Best fit: ",poly_i_name, "\n", "R = ", r_i, "; P ", p_value_i),
hjust = 0, vjust = 1, size = 4) +
labs(
title = paste("Model Fits vs Raw Data for ID", id_i),
x = "Dissolved oxygen percentage (DO)",
y = "MO2 (o2 mg/g/h)",
colour = "Model") +
theme_classic()
# Store the plot
plots[[id_i]] <- p
model_preds_list[[id_i]] <- df_predictions_long
print(p)
}
#To save all plots to individual files
for (id_i in ids) {
ggsave(filename = paste0(incremental_reg_freq_wd, "./plot_", id_i, ".png"), plot = plots[[id_i]], width = 8, height = 6)
}
model_preds_df <- bind_rows(model_preds_list)
The best fitting models were most often a 3rd-order polynomial
(n = 27, 46.55%) or a 2nd-order polynomial (n = 19,
32.76%). This could suggest the presence of a critical oxygen threshold
(Pcrit) where the relationship between o2 and MO2 changes. To confirm
their is a Pcrit, we need to validated the shape of the polynomials and
in should use a more specific model to test the Pcrit value. This type
of model is indicative of oxyregulator.
The next most common are 0th-order and 1st-order polynomials (both
n = 6, 10.34%). In the case of the 0th-order model, it suggests
that MO2 does not show a statistically significant dependence on the o2.
In other words, the metabolic rate does not adjust based on oxygen
availability, and there is no clear critical oxygen threshold (Pcrit)
where the relationship changes. This is indicative of a
oxyregulator. In the case of the 1st-order polynomials,
it suggest the presences of linear relationship between o2 and MO2,
which is indicative of oxyconformer. However, to be
true evidence of a oxyconformer this relationship should be positive
(i.e. as o2 falls MO2 also falls). Only 3 of the 6 individuals best
modelled with a linear function were positive, and only 2 were
statistically significant (Table 1).
total_fish <- nrow(best_weighted_model)
table_bwm <- best_weighted_model %>%
dplyr::group_by(poly) %>%
dplyr::reframe(n = length(id), percent = round((n/total_fish) * 100, 2)) %>%
dplyr::mutate(best_model_name = case_when(poly == 0 ~ "0th-order polynomial",
poly == 1 ~ "1st-order polynomial", poly == 2 ~ "2nd-order polynomial", poly ==
3 ~ "3rd-order polynomial", TRUE ~ "ERROR")) %>%
dplyr::select(best_model_name, everything(), -poly)
table_bwm %>%
gt() %>%
cols_align(align = "center", columns = everything())
| best_model_name | n | percent |
|---|---|---|
| 0th-order polynomial | 6 | 10.34 |
| 1st-order polynomial | 6 | 10.34 |
| 2nd-order polynomial | 19 | 32.76 |
| 3rd-order polynomial | 27 | 46.55 |
Adding model information to the full data frame
slope_tidy <- full_join(slope_tidy, best_weighted_model, by = "id") %>%
dplyr::rename(model_freq = model, r_squared_freq = r.squared, adj_r_squared_freq = adj.r.squared,
sigma_freq = sigma) %>%
dplyr::mutate(best_model_name = case_when(poly == 0 ~ "0th-order polynomial",
poly == 1 ~ "1st-order polynomial", poly == 2 ~ "2nd-order polynomial", poly ==
3 ~ "3rd-order polynomial", TRUE ~ "ERROR"))
Here we are grouping fish by best fitting model and getting an average trend
global_models <- list(
lm_0 = lmer(MO2_g ~ 1 + (1|id), data = slope_tidy %>%
dplyr::filter(poly == 0), weights = weight_smr),
lm_1 = lmer(MO2_g ~ DO + (1|id), data = slope_tidy %>%
dplyr::filter(poly == 1), weights = weight_smr),
lm_2 = lmer(MO2_g ~ poly(DO, 2) + (1|id), data = slope_tidy %>%
dplyr::filter(poly == 2), weights = weight_smr),
lm_3 = lmer(MO2_g ~ poly(DO, 3) + (1|id), data = slope_tidy %>%
dplyr::filter(poly == 3), weights = weight_smr)
)
global_predictions <- data.frame(DO = seq(min(slope_tidy$DO), max(slope_tidy$DO), length.out = 100))
for (model_name in names(global_models)) {
predictions <- predict(
global_models[[model_name]],
newdata = global_predictions,
re.form = NA, # Excludes random effects (population-level predictions)
se.fit = TRUE # Returns standard errors
)
global_predictions[[paste0(model_name, "_fit")]] <- predictions$fit
global_predictions[[paste0(model_name, "_lwr")]] <- predictions$fit - 1.96 * predictions$se.fit
global_predictions[[paste0(model_name, "_upr")]] <- predictions$fit + 1.96 * predictions$se.fit
}
global_predictions_long <- global_predictions %>%
pivot_longer(
cols = matches("lm_.*_fit|lm_.*_lwr|lm_.*_upr"),
names_to = c("model", ".value"),
names_pattern = "(lm_\\d+)_(.*)"
) %>%
dplyr::mutate(best_model_name = case_when(
model == "lm_0" ~ "0th-order polynomial",
model == "lm_1" ~ "1st-order polynomial",
model == "lm_2" ~ "2nd-order polynomial",
model == "lm_3" ~ "3rd-order polynomial",
TRUE ~ "ERROR"
))
Figure
best_weighted_model_pred <- best_weighted_model %>%
dplyr::left_join(., model_preds_df, by = c("id", "model")) %>%
dplyr::ungroup() %>%
dplyr::mutate(best_model_name = case_when(
poly == 0 ~ "0th-order polynomial",
poly == 1 ~ "1st-order polynomial",
poly == 2 ~ "2nd-order polynomial",
poly == 3 ~ "3rd-order polynomial",
TRUE ~ "ERROR"
))
annotation_data <- table_bwm %>%
dplyr::select(best_model_name, n)
fig_1 <- ggplot() +
geom_line(data = best_weighted_model_pred,
aes(x = DO, y = MO2_pred, color = id), size = 1, alpha = 1) +
geom_point(data = slope_tidy, aes(x = DO, y = MO2_g), alpha = 0.1, colour = "black", size = 2) +
geom_ribbon(data = global_predictions_long,
aes(x = DO, ymin = lwr, ymax = upr, group = model),
fill = "#FC6C85", alpha = 0.2) + # Shaded confidence intervals
geom_line(data = global_predictions_long,
aes(x = DO, y = fit), size = 1.5, color = "#FF007F") +
facet_wrap(~best_model_name) +
scale_color_grey(start = 0.1, end = 0.9) +
labs(
title = paste("Model estimates and observed data grouped by best fitting model"),
x = "Dissolved oxygen percentage (DO)",
y = "MO2 (O2 mg/g/h)") +
theme_classic() +
theme(legend.position = "none") +
geom_text(data = annotation_data,
aes(x = -Inf, y = Inf, label = paste0("italic(n) == ", n)),
hjust = -0.1, vjust = 1.2, inherit.aes = FALSE, parse = TRUE)
fig_1
For the 6 fish that had o2 and MO2 relationships best modelled
with a linear function, only 6 are were positive relationships (which we
would expect if fish were oxyconforming), and none are statistically
significant.
ids_lm_1_list <- best_weighted_model %>%
dplyr::filter(model == "lm_1") %>%
dplyr::pull(id)
weighted_model_results %>%
dplyr::filter(id %in% ids_lm_1_list & model == "lm_1" & term == "DO") %>%
dplyr::mutate(estimate = round(estimate, 5), ci = paste0(round(conf_low, 5),
" – ", round(conf_high, 5)), p_value = round(p_value, 3)) %>%
dplyr::select(id, estimate, ci, p_value) %>%
gt() %>%
cols_align(align = "center", columns = everything())
| id | estimate | ci | p_value |
|---|---|---|---|
| a_9_22nov_1 | 0.00061 | 0.00012 – 0.0011 | 0.016 |
| b_0_25nov_2 | 0.00021 | -5e-05 – 0.00046 | 0.104 |
| b_0_26nov_3 | 0.00023 | 3e-05 – 0.00042 | 0.025 |
| b_0_27nov_2 | -0.00016 | -0.00037 – 4e-05 | 0.111 |
| c_9_26nov_3 | -0.00095 | -0.00139 – -0.00051 | 0.000 |
| d_9_25nov_2 | -0.00132 | -0.00215 – -0.00049 | 0.003 |
Here we build the same models as above but using the SMR estimated
with the Chabot methods.
ids <- check_pcrit_df %>%
dplyr::distinct(id) %>%
dplyr::pull()
pcrit_model_df_list_2 <- list()
pcrit_models_2 <- list()
for (id_i in ids) {
df_i <- check_pcrit_df %>%
dplyr::filter(id == id_i)
o2crit <- calcO2crit(Data = df_i, SMR = df_i$SMR_CHABOT[1], lowestMO2 = NA, gapLimit = 4,
max.nb.MO2.for.reg = 7)
lowestMO2 = quantile(df_i$MO2[df_i$DO >= 80], p = 0.05)
vaule <- o2crit$o2crit
SMR <- o2crit$SMR
nb_mo2_conforming <- o2crit$Nb_MO2_conforming
r2 <- o2crit$r2
method <- o2crit$Method
p <- o2crit$P[1]
pcrit_model_df <- tibble(id = id_i, pcrit_vaule = vaule, pcrit_SMR = SMR, pcrit_lowestMO2 = lowestMO2,
pcrit_nb_mo2_conforming = nb_mo2_conforming, pcrit_r2 = r2, pcrit_method = method,
pcrit_p = p)
pcrit_model_df_list_2[[id_i]] <- pcrit_model_df
pcrit_models_2[[id_i]] <- o2crit
}
pcrit_model_df_2 <- bind_rows(pcrit_model_df_list_2)
Now filtering out based on the same rules above
pcrit_list_2 <- pcrit_model_df_2 %>%
dplyr::filter(pcrit_nb_mo2_conforming > 2) %>%
pull(id)
paste0("Based on this rule there are ", length(pcrit_list_2), " fish with possible Pcrits.")
## [1] "Based on this rule there are 14 fish with possible Pcrits."
Plotting with the SMR Chabot method
for (id_i in pcrit_list) {
comment <- check_pcrit_df %>%
dplyr::filter(id == id_i) %>%
dplyr::slice(1) %>%
dplyr::mutate(comment = if_else(is.na(comments), "", paste0("#", comments))) %>%
pull(comment)
r2 <- pcrit_model_df_2 %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_r2 = round(pcrit_r2, 3)) %>%
dplyr::pull(pcrit_r2)
conforming <- pcrit_model_df_2 %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_nb_mo2_conforming = round(pcrit_nb_mo2_conforming, 3)) %>%
dplyr::pull(pcrit_nb_mo2_conforming)
P <- pcrit_model_df_2 %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_p = round(pcrit_p, 3)) %>%
dplyr::pull(pcrit_p)
SMR <- pcrit_model_df_2 %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_SMR = round(pcrit_SMR, 3)) %>%
dplyr::pull(pcrit_SMR)
lowestMO2 <- pcrit_model_df_2 %>%
dplyr::filter(id == id_i) %>%
dplyr::mutate(pcrit_lowestMO2 = round(pcrit_lowestMO2, 3)) %>%
dplyr::pull(pcrit_lowestMO2)
# Generate and render the plot
plotO2crit(o2critobj = pcrit_models_2[[id_i]])
# Add a title
mtext(text = paste0(id_i, " ", comment), side = 3, line = 2, adj = 0, col = "blue",
font = 2, cex = 1.2)
mtext(text = paste0("R2 = ", r2, "; p = ", P, "; CP < SMR = ", conforming, "; SMR = ",
SMR, "; lowestMO2 = ", lowestMO2), side = 3, line = 1, adj = 0, col = "blue",
font = 1, cex = 0.8)
}
have_pcirt_2 <- c("a_9_21nov_3", "b_0_24_nov_1", "b_0_24nov_2", "b_0_25nov_1", "b_0_25nov_3",
"b_0_26_1", "b_9_21nov_1", "b_9_21nov_2", "b_9_21nov_3", "d_0_21nov_3")
length(have_pcirt_2)
## [1] 10
Based on visual checks the following fish do have clear Pcrit values
do_have_pcirt_2 <- c("a_9_21nov_3", "b_0_24_nov_1", "b_0_24nov_2", "b_0_25nov_1",
"b_0_25nov_3", "b_0_26_1", "b_9_21nov_1", "b_9_21nov_2", "b_9_21nov_3", "d_0_21nov_3")
n_pcrit <- length(do_have_pcirt_2)
have_pcirt_2 <- pcrit_model_df_2 %>%
dplyr::filter(id %in% do_have_pcirt_2)
mean_pcrit <- have_pcirt_2 %>%
dplyr::reframe(mean = mean(pcrit_vaule)) %>%
pull(mean) %>%
round(., 2)
min_pcrit <- have_pcirt_2 %>%
dplyr::reframe(min = min(pcrit_vaule)) %>%
pull(min) %>%
round(., 2)
max_pcrit <- have_pcirt_2 %>%
dplyr::reframe(max = max(pcrit_vaule)) %>%
pull(max) %>%
round(., 2)
print(paste0("There are ", n_pcrit, " fish with identified Pcrits and the mean Pcrit is ",
mean_pcrit, " (range: ", min_pcrit, "–", max_pcrit, ")"))
## [1] "There are 10 fish with identified Pcrits and the mean Pcrit is 26.92 (range: 17.9–39.9)"
Here using the the fish that were detriment to have Pcrits, we will
also estimate Pcrit with five popular techniques for Pcrit calculation:
the traditional breakpoint metric (broken stick regression), the
nonlinear regression metric (Marshall et al. 2013), the sub-prediction
interval metric (Birk et al. 2019), the alpha-based Pcrit method (Seibel
et al. 2021), and the linear low O2 (LLO) method (Reemeyer & Rees
2019).
The function is called calc_pcrit() and is part of the respirometry
package.
Link: https://search.r-project.org/CRAN/refmans/respirometry/html/calc_pcrit.html
Parameters to consider
avg_top_n: for alpha method, a numeric value
representing the number of top α0 (MO2/PO2) values to average together
to estimate α. Default is 1. We recommend no more than 3 to avoid
diminishing the α value with sub-maximal observations.
level: for Sub_PI method, Percentage at which
the prediction interval should be constructed.
iqr: Only for Sub_PI. Removes mo2 observations
that are this many interquartile ranges away from the mean value for the
oxyregulating portion of the trial. If this filtering is not desired,
set to infinity.
NLR_m: only applies to NLR. Pcrit is defined as
the PO2 at which the slope of the best fitting function equals NLR_m
(after the MO2 data are normalized to the 90% quantile). Default is
0.065
MR: A numeric value for the metabolic rate at
which pcrit_alpha and pcrit_LLO should be returned. If not supplied by
the user, then the mean MO2 of the “oxyregulating” portion of the curve
is applied for pcrit_alpha and NA is returned for pcrit_LLO.
mo2_threshold: A single numeric value above which mo2 values are ignored for alpha Pcrit estimation. Useful to removing obviously erroneous values. Default is Inf.
We will only those that may have a Pcirt, we will also need to weight
the SMR vaules some way. In this case there is not easy way to weight
the models themsevles, so instead we can just take a aveage of the SMR
values and use only that
pcrit_check_smr_df <- slope_tidy %>%
dplyr::filter(id %in% pcrit_list & phase == "smr") %>%
dplyr::group_by(id) %>%
dplyr::reframe(DO = mean(DO, na.rm = TRUE), MO2 = SMR[1], SMR = SMR[1])
pcrit_check_closed_df <- slope_tidy %>%
dplyr::filter(id %in% pcrit_list & phase != "smr") %>%
dplyr::select(id, DO, MO2, SMR)
pcrit_check_df <- rbind(pcrit_check_smr_df, pcrit_check_closed_df)
pcrit_check_df_n <- pcrit_check_df %>%
dplyr::distinct(id) %>%
nrow(.)
paste0("n for possible Pcrit = ", pcrit_check_df_n)
## [1] "n for possible Pcrit = 14"
Here we build the models
combined_pcirt_list <- list()
for (id_i in pcrit_list) {
id_name <- id_i
mo2_data <- pcrit_check_df %>%
dplyr::filter(id == id_i)
MR_set <- mo2_data$SMR[1] %>% as.numeric()
# Use tryCatch to handle errors and skip problematic calculations
pcrit_df <- tryCatch({
pcrit_df <- calc_pcrit(po2 = mo2_data$DO,
mo2 = mo2_data$MO2,
method = 'All',
avg_top_n = 2, # alpha metric (default = 1) recommend no more than 3
level = 0.95, # Sub_PI metric (default = 0.95)
iqr = 1.5, # Sub_PI metric (default = 1.5)
NLR_m = 0.065, # NLR metric (default = 0.065)
MR = MR_set, # alpha and LLO metrics,
mo2_threshold = Inf, # alpha metric
return_models = FALSE # return model parameters?
) %>%
as.data.frame() %>%
rownames_as_column(var = "method") %>%
rename(value = ".") %>%
tidyr::pivot_wider(.,
names_from = method,
values_from = value) %>%
dplyr::mutate(id = id_name) %>%
dplyr::select(id, everything())
}, error = function(e) {
message("Skipping channel ", id_name, " due to error: ", conditionMessage(e))
NULL
})
# Only add to list if pcrit_df is not NULL
if (!is.null(pcrit_df)) {
combined_pcirt_list[[id_name]] <- pcrit_df
}
}
Combined all the Pcrit model estimates together
pcirt <- bind_rows(combined_pcirt_list)
id_s_comp <- pcirt %>%
dplyr::pull()
Here we will save the plots for the various Pcrit curves.
# Create output directory if needed
output_fig_pcrit_alternative_wd <- file.path(output_fig_wd, "pcrit-alternative")
if (!dir.exists(output_fig_pcrit_alternative_wd)) {
dir.create(output_fig_pcrit_alternative_wd)
}
# Open a single PDF device once
pdf(file = file.path(output_fig_pcrit_alternative_wd, "combined_pcrit_plots.pdf"),
width = 8, height = 6)
for (id_i in pcrit_list) {
id_name <- id_i
mo2_data <- pcrit_check_df %>%
dplyr::filter(id == id_i)
MR_set <- mo2_data$SMR[1] %>% as.numeric()
tryCatch({
# Generate and render the plot
plot_pcrit(
po2 = mo2_data$DO,
mo2 = mo2_data$MO2,
method = 'All',
avg_top_n = 1,
level = 0.95,
iqr = 1.5,
NLR_m = 0.065,
MR = MR_set,
mo2_threshold = Inf,
return_models = FALSE,
showNLRs = FALSE
)
# Add a title in the top-left corner
mtext(text = paste(id_name),
side = 3, line = 2, adj = 0, # Top margin, aligned to left
col = "blue", font = 2, cex = 1.2)
}, error = function(e) {
message("Skipping channel ", id_name, " due to error: ", conditionMessage(e))
})
}
# Close the PDF device *after* the loop
dev.off()
## png
## 2
Plotting in the html. None of the models appear to estimate a
Pcrit value convincingly.
for (id_i in pcrit_list) {
id_name <- id_i
mo2_data <- pcrit_check_df %>%
dplyr::filter(id == id_i)
MR_set <- mo2_data$SMR[1] %>% as.numeric()
tryCatch({
# Generate and render the plot
plot_pcrit(
po2 = mo2_data$DO,
mo2 = mo2_data$MO2,
method = 'All',
avg_top_n = 1,
level = 0.95,
iqr = 1.5,
NLR_m = 0.065,
MR = MR_set,
mo2_threshold = Inf,
return_models = FALSE,
showNLRs = FALSE
)
# Add a title in the top-left corner
mtext(text = paste(id_name),
side = 3, line = 2, adj = 0, # Top margin, aligned to left
col = "blue", font = 2, cex = 1.2)
}, error = function(e) {
message("Skipping channel ", id_name, " due to error: ", conditionMessage(e))
})
}